artext 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dab606c0c96c6939da80ddf9e5ae61ecb905d90f
4
+ data.tar.gz: 7df963e4a642b499c776b844e2da9901e58dfefb
5
+ SHA512:
6
+ metadata.gz: 13819faa7e432065a235b69ad2bcd326015716dff27e1bea9b8b7929e3c4b2d0a0ca77e2b16ee1f276714fe07bf066f14b563ad9c1d5e762859c414b7e7991dc
7
+ data.tar.gz: 8e8c98d7d52e137272181661a30921ef9d49af6cfd419cdbc9fb78c1b8f484bea879c93a2fac970c3a59d76bc1643df3a6f308b9436a480b05fa313c82605b1f
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in artext.gemspec
4
+ gemspec
@@ -0,0 +1,54 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ artext (0.0.1)
5
+ addressable (~> 2.3)
6
+ fastimage (~> 1.6)
7
+ httparty (~> 0.13)
8
+ mini_magick (~> 3.7)
9
+ nokogiri (~> 1.6)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ addressable (2.3.8)
15
+ diff-lcs (1.2.5)
16
+ fastimage (1.7.0)
17
+ addressable (~> 2.3, >= 2.3.5)
18
+ httparty (0.13.5)
19
+ json (~> 1.8)
20
+ multi_xml (>= 0.5.2)
21
+ json (1.8.3)
22
+ mini_magick (3.8.1)
23
+ subexec (~> 0.2.1)
24
+ mini_portile (0.6.2)
25
+ multi_xml (0.5.5)
26
+ nokogiri (1.6.6.2)
27
+ mini_portile (~> 0.6.0)
28
+ rake (10.4.2)
29
+ rspec (3.3.0)
30
+ rspec-core (~> 3.3.0)
31
+ rspec-expectations (~> 3.3.0)
32
+ rspec-mocks (~> 3.3.0)
33
+ rspec-core (3.3.2)
34
+ rspec-support (~> 3.3.0)
35
+ rspec-expectations (3.3.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.3.0)
38
+ rspec-mocks (3.3.2)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.3.0)
41
+ rspec-support (3.3.0)
42
+ subexec (0.2.3)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ artext!
49
+ bundler (~> 1.6)
50
+ rake (~> 10.0)
51
+ rspec
52
+
53
+ BUNDLED WITH
54
+ 1.10.6
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Anindya Mondal
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ # Artext
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'artext'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install artext
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/artext/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
8
+
9
+
@@ -0,0 +1,31 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'artext/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "artext"
8
+ spec.version = Artext::VERSION
9
+ spec.authors = ["Anindya Mondal"]
10
+ spec.email = ["anindyamondal@mazdigital.com"]
11
+ spec.summary = %q{Extract article from websites.}
12
+ spec.description = %q{Extract article and other metadata from websites.}
13
+ spec.homepage = "https://github.com/Anindya91/artext"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.3"
24
+
25
+
26
+ spec.add_dependency "addressable", "~> 2.3"
27
+ spec.add_dependency "httparty", "~> 0.13"
28
+ spec.add_dependency "fastimage", "~> 1.6"
29
+ spec.add_dependency "mini_magick", "~> 3.7"
30
+ spec.add_dependency "nokogiri", "~> 1.6"
31
+ end
@@ -0,0 +1,384 @@
1
+ require "artext/version"
2
+ require "open-uri"
3
+ require "httparty"
4
+ require "nokogiri"
5
+ require "mini_magick"
6
+ require "fastimage"
7
+
8
+ module Artext
9
+
10
+ USER_AGENT = "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"
11
+
12
+ def self.extract(url)
13
+ url = (url =~ /^(http|https):\/\/(.)*/i) ? url : "http://#{url}"
14
+ return {:url => url, :data => [], :article => []} if ((url =~ /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/ix).nil?)
15
+ begin
16
+ res = HTTParty.get(url, headers: {"User-Agent" => USER_AGENT})
17
+ raise Exception.new("Unable to crawl URL") if res.code != 200
18
+ doc = Nokogiri::HTML(res)
19
+ rescue Exception => e
20
+ doc = Nokogiri::HTML(open(url, "User-Agent" => USER_AGENT))
21
+ end
22
+ data = get_data_from_url(doc, url)
23
+ article = get_article_from_url(doc, url, data[:type])
24
+ data[:type] = "image" if (article[:score] == 1)
25
+ response = {:url => url, :data => [data], :article => [article]}
26
+ end
27
+
28
+ def self.get_data_from_url(doc, url)
29
+ og_image = doc.search("//meta[@property='og:image' or @name='og:image']")
30
+ og_images = []
31
+ if !is_blank?(og_image)
32
+ og_image.each do |ogi|
33
+ if !is_blank?(ogi["content"])
34
+ image = ogi["content"]
35
+ if (image =~ /^\/\/(.)*/)
36
+ uri = URI.parse(url)
37
+ image = "#{uri.scheme}:#{image}"
38
+ elsif (image =~ /^\/(.)*/)
39
+ uri = URI.parse(url)
40
+ image = File.join("#{uri.scheme}://#{uri.host}", image)
41
+ end
42
+ og_images << image
43
+ end
44
+ end
45
+ end
46
+ # Try to get the best image based on heuristics
47
+ image = get_best_image(og_images)
48
+
49
+ og_title = doc.search("//meta[@property='og:title' or @name='og:title']")
50
+ if (!is_blank?(og_title) && !is_blank?(og_title[0]["content"]))
51
+ clip_title = og_title[0]["content"]
52
+ else
53
+ page_title = doc.search("//title")[0]
54
+ clip_title = page_title.text if !is_blank?(page_title)
55
+ end
56
+
57
+ tags = []
58
+ possible_tags = doc.xpath('//meta[contains(@name, "tag") or contains(@name, "keyword") or contains(@property, "tag") or contains(@property, "keyword")]')
59
+ possible_tags.each{|e| tags << e["content"].split(',') if (!e["content"].nil?)}
60
+ tags = tags.flatten.map(&:strip).uniq!
61
+
62
+ type = doc.search("//meta[@property='og:type' or @name='og:type']")
63
+ type = is_blank?(type) ? nil : type[0]["content"]
64
+
65
+ favicon = "http://www.google.com/s2/favicons?domain_url=#{url}"
66
+ theme = get_dominant_color(favicon)
67
+
68
+ res = {:image => image, :title => clip_title, :tags => tags, :type => type, :favicon => favicon, :theme => theme}
69
+ end
70
+
71
+ def self.get_author_and_date(doc)
72
+ authors = []
73
+ possible_authors = doc.xpath('//meta[@property="author" or @name="author"]')
74
+ possible_authors.each {|a| authors << a["content"] if (!is_blank?(a["content"]))}
75
+
76
+ date = nil
77
+ possible_dates = doc.xpath('//meta[contains(@property, "date") or contains(@name, "date")]')
78
+ if (!is_blank?(possible_dates))
79
+ possible_dates.each do |po|
80
+ if (po["content"][0..3].to_i > 2000)
81
+ date = po["content"] if (!is_blank?(po["content"]))
82
+ break
83
+ end
84
+ end
85
+ end
86
+ if (date.nil?)
87
+ possible_dates = doc.xpath('//*[contains(@datetime, "2015")]')
88
+ date = possible_dates.first.attribute("datetime").value if (!is_blank?(possible_dates))
89
+ end
90
+ if (!date.nil?)
91
+ datetime = date.split.join(" ")[0..9].split("-")
92
+ t = Time.new(datetime[0], datetime[1], datetime[2])
93
+ date = "#{Date::MONTHNAMES[t.month]} #{t.day}, #{t.year}"
94
+ end
95
+ return {:date => date, :authors => authors}
96
+ end
97
+
98
+ def self.get_best_image(images)
99
+ return nil if (is_blank?(images))
100
+ return images[0] if (images.size == 1)
101
+ # reject logo or similar images
102
+ refined_images = images.reject{|i| i =~ /logo|fallback/i}
103
+ return refined_images[0] if (refined_images.size == 1)
104
+ refined_images = images if is_blank?(refined_images)
105
+ dimensions = []
106
+ refined_images.each do |i|
107
+ type = FastImage.type(Addressable::URI.escape(i))
108
+ size = FastImage.size(Addressable::URI.escape(i))
109
+ return i if((type == :gif) && (size && size[0] > 299 && size[1] > 199))
110
+ dimensions << {:x => size[0], :y => size[1], :image => i} if !size.nil?
111
+ end
112
+ image = is_blank?(dimensions) ? nil : dimensions.max_by{|d| d[:x]}[:image]
113
+ return image
114
+ end
115
+
116
+ def self.get_article_from_url(doc, url, type)
117
+ dates = get_author_and_date(doc)
118
+ article = doc.search("//article")
119
+ score = 0.9
120
+ article = [] if (article.count == 1 && article.text.split.join(" ").length < 500)
121
+ if (article.count > 1)
122
+ article = get_correct_article(article)
123
+ score = 0.8
124
+ end
125
+ if (!is_blank?(article))
126
+ article = find_article(doc)
127
+ score = 0.6
128
+ end
129
+ score = score - 0.5 if (type != "article")
130
+ if (is_blank?(article))
131
+ # image url
132
+ begin
133
+ html = doc.to_html
134
+ raise Exception.new("Image URL") if is_blank?(html)
135
+ return {:body => "", :text => "", :images => [], :score => 0}
136
+ rescue Exception => e
137
+ return {:body => "<figure><img src=\"#{url}\"></figure>", :text => "", :images => [url], :score => 1}
138
+ end
139
+ else
140
+ article = remove_unwanted_items_from(article)
141
+ article, score = find_relevant(article, score)
142
+ if (score > 0.9)
143
+ html, imgs = iteratively_clean(article, "", [], score)
144
+ else
145
+ html, imgs = recursively_clean(article, "", [], score)
146
+ end
147
+ response = {:body => html, :text => article.text.split.join(" "), :images => imgs, :score => score}.merge(dates)
148
+ end
149
+ end
150
+
151
+ def self.find_relevant(article, score)
152
+ p_elems = article.search("p")
153
+ total_p = p_elems.count
154
+ rel = article
155
+ max_p = 0
156
+ last_p = 0
157
+ if (!is_blank?(p_elems))
158
+ while (p_elems[0].text != article.text || p_elems[0].attribute("class") != article.attribute("class"))
159
+ parent_elems = p_elems.map {|p| p.parent()}
160
+ parent_elems.each do |ps|
161
+ if (ps.search("p").count > max_p)
162
+ max_p = ps.search("p").count
163
+ rel = ps
164
+ end
165
+ end
166
+ if ((last_p > 5 && last_p == max_p) || max_p - total_p < 2)
167
+ score = 0.95 if (score < 1)
168
+ break
169
+ end
170
+ last_p = max_p
171
+ p_elems = parent_elems
172
+ end
173
+ end
174
+ return rel, score
175
+ end
176
+
177
+ def self.iteratively_clean(element, html, images, score)
178
+ html = ""
179
+ imgs = []
180
+ element.children.each do |elem|
181
+ tv, ti = get_element_html(elem, [], score)
182
+ html = html + tv if (!is_blank?(tv))
183
+ imgs = imgs + ti if (!is_blank?(ti))
184
+ end
185
+ imgs.uniq!
186
+ return html, imgs
187
+ end
188
+
189
+ def self.recursively_clean(element, html, images, score)
190
+ allowable = ["p", "figure", "a", "h1", "h2", "h3", "h4", "text"]
191
+ if (is_blank?(element.children) || (element.class != Nokogiri::XML::NodeSet && (allowable.include?(element.name))))
192
+ tv, ti = get_element_html(element, [], score)
193
+ html = html + tv if (!is_blank?(tv))
194
+ images = images + ti
195
+ images.uniq!
196
+ else
197
+ if (element.class != Nokogiri::XML::NodeSet && (element.name == "header" || element.name == "footer"))
198
+ # Eat it
199
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "div" && !is_blank?(element.attribute("class")) && element.attribute("class").value.downcase.include?("meta") && element.text.split.join(" ").length < 300)
200
+ #Eat it
201
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "section" && score > 0.9)
202
+ #Eat it
203
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "ul" && score > 0.9)
204
+ #Eat it
205
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "ol" && score > 0.9)
206
+ #Eat it
207
+ else
208
+ element.children.each do |art|
209
+ html, images = recursively_clean(art, html, images, score)
210
+ end
211
+ end
212
+ end
213
+ return html, images
214
+ end
215
+
216
+ def self.get_element_html(element, images, score)
217
+ if (element.name == "a")
218
+ # Eat it
219
+ elsif (element.name == "img")
220
+ img = get_valid_image(element)
221
+ if (!is_blank?(img))
222
+ tv = "<figure><img src=\"#{img}\"></figure>"
223
+ images = images + [img]
224
+ end
225
+ elsif (element.name == "h1" || element.name == "h2" || element.name == "h3" || element.name == "h4")
226
+ tv = "<h2>#{element.text.split.join(" ")}</h2>" if (!is_blank?(element.text.split.join(" ")))
227
+ elsif (element.name == "p" || element.name == "div")
228
+ p_elem, ti = extractp(element, score)
229
+ tv = "<p>#{p_elem}</p>" if (!is_blank?(p_elem))
230
+ images = images + ti if (!is_blank?(ti))
231
+ elsif (element.name == "figure")
232
+ cap = element.search("figcaption").text.split.join(" ")
233
+ cap = is_blank?(cap) ? "" : "<figcaption>#{cap}</figcaption>"
234
+ tv, ti = figurehandle(element, "", [])
235
+ tv = "<figure>#{tv}#{cap}</figure>" if (!is_blank?(tv))
236
+ images = images + ti
237
+ elsif (element.name == "text")
238
+ tv = element.text.split.join(" ")
239
+ tv = "<p class\"inline\">#{tv}</p>" if (!is_blank?(tv))
240
+ elsif (element.name == "i")
241
+ tv = element.text.split.join(" ")
242
+ tv = "<i>#{tv}</i>" if (!is_blank?(tv))
243
+ elsif (element.name == "ol" || element.name == "ul")
244
+ tv, ti = listhandle(element)
245
+ images = images + ti
246
+ end
247
+ return tv, images
248
+ end
249
+
250
+ def self.get_valid_image(element)
251
+ if (!is_blank?(element))
252
+ tsrc1 = nil
253
+ search_in = ["data-image", "data-original", "srcset", "data-src", "datasrc", "rel:bf_image_src", "src"]
254
+ search_in.each do |search|
255
+ tsrc = element.attribute(search)
256
+ if (!is_blank?(tsrc))
257
+ tsrc1 = tsrc.value
258
+ tsrc1 = tsrc1.split(",").first.split(" ")[0] if (search == "srcset")
259
+ break
260
+ end
261
+ end
262
+ if (!is_blank?(tsrc1))
263
+ tsrc1 = "http:" + tsrc1 if (tsrc1[0..1] == "//")
264
+ tv_size = FastImage.size(Addressable::URI.escape(tsrc1))
265
+ if (!tv_size.nil? && (tv_size[0] > 100 || tv_size[1] > 100))
266
+ return tsrc1
267
+ end
268
+ end
269
+ end
270
+ return ""
271
+ end
272
+
273
+ def self.extractp(element, score)
274
+ p_elem = nil
275
+ imgs = []
276
+ as = element.search("a")
277
+ if (!is_blank?(as) && element.text == as.text && score < 0.8)
278
+ return nil
279
+ end
280
+ p_elem, imgs = phandle(element, "", []) if (!is_blank?(element))
281
+ return p_elem, imgs
282
+ end
283
+
284
+ def self.phandle(element, html, images)
285
+ if (!is_blank?(element.children) && !(element.name == "a" && is_blank?(element.search("img"))))
286
+ element.children.each do |elem|
287
+ html, images = phandle(elem, html, images)
288
+ end
289
+ end
290
+ if (element.name == "img")
291
+ img = get_valid_image(element)
292
+ if (!is_blank?(img))
293
+ html = "</p><figure><img src=\"#{img}\"></figure><p>"
294
+ images << img
295
+ end
296
+ elsif (element.name == "a")
297
+ html = html + " <a href=\"#{element.attribute("href").value if (!is_blank?(element.attribute("href")))}\">#{element.text.split.join(" ")}</a> "
298
+ elsif (element.name == "text")
299
+ html = html + element.text.split.join(" ")
300
+ elsif (element.name == "br")
301
+ html = html + "<br>"
302
+ elsif (element.name == "p" && is_blank?(html))
303
+ html = element.text.split.join(" ")
304
+ end
305
+ return html, images
306
+ end
307
+
308
+ def self.figurehandle(element, html, images)
309
+ if (element.name == "img" || (!is_blank?(element.attribute("class")) && element.attribute("class").value.include?("js-delayed-image-load")))
310
+ img = get_valid_image(element)
311
+ if (!is_blank?(img))
312
+ html = html + "<img src=\"#{img}\">"
313
+ images << img
314
+ end
315
+ elsif (is_blank?(element.children))
316
+ return html, images
317
+ else
318
+ element.children.each do |elem|
319
+ html, images = figurehandle(elem, html, images)
320
+ end
321
+ end
322
+ return html, images
323
+ end
324
+
325
+ def self.listhandle(element)
326
+ html = ""
327
+ imgs = []
328
+ li_elems = element.search("li")
329
+ li_elems.each do |elem|
330
+ tv, ti = recursively_clean(elem, "", [], 0.95)
331
+ html = html + "<li>#{tv}</li>" if (!is_blank?(tv))
332
+ imgs = imgs + ti if (!is_blank?(ti))
333
+ end
334
+ if (element.name == "ul")
335
+ html = "<ul>#{html}</ul>" if (!is_blank?(html))
336
+ elsif (element.name == "ol")
337
+ html = "<ol>#{html}</ol>" if (!is_blank?(html))
338
+ end
339
+ return html, imgs
340
+ end
341
+
342
+ def self.find_article(doc)
343
+ article = doc.xpath('//*[@*="articleBody"]')
344
+ article = doc.xpath('//*[contains(@class, "article")]') if (is_blank?(article) || article.text.split.join(" ").length < 450)
345
+ article = doc.xpath('//*[contains(@class, "body")]') if (is_blank?(article) || article.text.split.join(" ").length < 450)
346
+ return article
347
+ end
348
+
349
+ def self.get_correct_article(articles)
350
+ articles.each do |article|
351
+ if (article.text.split.join(" ").length > 200)
352
+ return article
353
+ end
354
+ end
355
+ return nil
356
+ end
357
+
358
+ def self.remove_unwanted_items_from(article)
359
+ unwanted_elements = ["//script", "//comment()", "//aside", ".aside", "iframe", "//noscript", "//form"]
360
+ unwanted_elements.each do |elem|
361
+ article.search("#{elem}").remove
362
+ end
363
+ removable_elements = ["comment", "social", "advertisement", "share"]
364
+ removable_elements.each do |rem|
365
+ article.xpath("//*[contains(@*, '#{rem}')]").remove
366
+ end
367
+ return article
368
+ end
369
+
370
+ def self.get_dominant_color(url)
371
+ image = MiniMagick::Image.open(url)
372
+ color = image.run_command("convert", image.path, "-format", "%c\n", "-colors", 1, "-depth", 8, "histogram:info:").split(' ')
373
+ return color[Hash[color.map{|h| h =~ /^#/}.map.with_index.to_a][0]][0..6]
374
+ end
375
+
376
+ def self.is_blank?(value)
377
+ if (value.class == Nokogiri::XML::Element || value.class == Nokogiri::XML::Attr)
378
+ return (value.nil? || value.blank?)
379
+ else
380
+ return (value.nil? || value.empty?)
381
+ end
382
+ end
383
+
384
+ end
@@ -0,0 +1,3 @@
1
+ module Artext
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: artext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Anindya Mondal
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '3.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '3.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: addressable
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: httparty
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '0.13'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '0.13'
83
+ - !ruby/object:Gem::Dependency
84
+ name: fastimage
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '1.6'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '1.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mini_magick
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: '3.7'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '3.7'
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '1.6'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ~>
123
+ - !ruby/object:Gem::Version
124
+ version: '1.6'
125
+ description: Extract article and other metadata from websites.
126
+ email:
127
+ - anindyamondal@mazdigital.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - Gemfile
133
+ - Gemfile.lock
134
+ - LICENSE.txt
135
+ - README.md
136
+ - Rakefile
137
+ - artext.gemspec
138
+ - lib/artext.rb
139
+ - lib/artext/version.rb
140
+ homepage: https://github.com/Anindya91/artext
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubyforge_project:
160
+ rubygems_version: 2.4.8
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Extract article from websites.
164
+ test_files: []