artext 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dab606c0c96c6939da80ddf9e5ae61ecb905d90f
4
+ data.tar.gz: 7df963e4a642b499c776b844e2da9901e58dfefb
5
+ SHA512:
6
+ metadata.gz: 13819faa7e432065a235b69ad2bcd326015716dff27e1bea9b8b7929e3c4b2d0a0ca77e2b16ee1f276714fe07bf066f14b563ad9c1d5e762859c414b7e7991dc
7
+ data.tar.gz: 8e8c98d7d52e137272181661a30921ef9d49af6cfd419cdbc9fb78c1b8f484bea879c93a2fac970c3a59d76bc1643df3a6f308b9436a480b05fa313c82605b1f
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in artext.gemspec
4
+ gemspec
@@ -0,0 +1,54 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ artext (0.0.1)
5
+ addressable (~> 2.3)
6
+ fastimage (~> 1.6)
7
+ httparty (~> 0.13)
8
+ mini_magick (~> 3.7)
9
+ nokogiri (~> 1.6)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ addressable (2.3.8)
15
+ diff-lcs (1.2.5)
16
+ fastimage (1.7.0)
17
+ addressable (~> 2.3, >= 2.3.5)
18
+ httparty (0.13.5)
19
+ json (~> 1.8)
20
+ multi_xml (>= 0.5.2)
21
+ json (1.8.3)
22
+ mini_magick (3.8.1)
23
+ subexec (~> 0.2.1)
24
+ mini_portile (0.6.2)
25
+ multi_xml (0.5.5)
26
+ nokogiri (1.6.6.2)
27
+ mini_portile (~> 0.6.0)
28
+ rake (10.4.2)
29
+ rspec (3.3.0)
30
+ rspec-core (~> 3.3.0)
31
+ rspec-expectations (~> 3.3.0)
32
+ rspec-mocks (~> 3.3.0)
33
+ rspec-core (3.3.2)
34
+ rspec-support (~> 3.3.0)
35
+ rspec-expectations (3.3.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.3.0)
38
+ rspec-mocks (3.3.2)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.3.0)
41
+ rspec-support (3.3.0)
42
+ subexec (0.2.3)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ artext!
49
+ bundler (~> 1.6)
50
+ rake (~> 10.0)
51
+ rspec
52
+
53
+ BUNDLED WITH
54
+ 1.10.6
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Anindya Mondal
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ # Artext
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'artext'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install artext
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/artext/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
8
+
9
+
@@ -0,0 +1,31 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'artext/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "artext"
8
+ spec.version = Artext::VERSION
9
+ spec.authors = ["Anindya Mondal"]
10
+ spec.email = ["anindyamondal@mazdigital.com"]
11
+ spec.summary = %q{Extract article from websites.}
12
+ spec.description = %q{Extract article and other metadata from websites.}
13
+ spec.homepage = "https://github.com/Anindya91/artext"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.3"
24
+
25
+
26
+ spec.add_dependency "addressable", "~> 2.3"
27
+ spec.add_dependency "httparty", "~> 0.13"
28
+ spec.add_dependency "fastimage", "~> 1.6"
29
+ spec.add_dependency "mini_magick", "~> 3.7"
30
+ spec.add_dependency "nokogiri", "~> 1.6"
31
+ end
@@ -0,0 +1,384 @@
1
+ require "artext/version"
2
+ require "open-uri"
3
+ require "httparty"
4
+ require "nokogiri"
5
+ require "mini_magick"
6
+ require "fastimage"
7
+
8
+ module Artext
9
+
10
+ USER_AGENT = "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"
11
+
12
+ def self.extract(url)
13
+ url = (url =~ /^(http|https):\/\/(.)*/i) ? url : "http://#{url}"
14
+ return {:url => url, :data => [], :article => []} if ((url =~ /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/ix).nil?)
15
+ begin
16
+ res = HTTParty.get(url, headers: {"User-Agent" => USER_AGENT})
17
+ raise Exception.new("Unable to crawl URL") if res.code != 200
18
+ doc = Nokogiri::HTML(res)
19
+ rescue Exception => e
20
+ doc = Nokogiri::HTML(open(url, "User-Agent" => USER_AGENT))
21
+ end
22
+ data = get_data_from_url(doc, url)
23
+ article = get_article_from_url(doc, url, data[:type])
24
+ data[:type] = "image" if (article[:score] == 1)
25
+ response = {:url => url, :data => [data], :article => [article]}
26
+ end
27
+
28
+ def self.get_data_from_url(doc, url)
29
+ og_image = doc.search("//meta[@property='og:image' or @name='og:image']")
30
+ og_images = []
31
+ if !is_blank?(og_image)
32
+ og_image.each do |ogi|
33
+ if !is_blank?(ogi["content"])
34
+ image = ogi["content"]
35
+ if (image =~ /^\/\/(.)*/)
36
+ uri = URI.parse(url)
37
+ image = "#{uri.scheme}:#{image}"
38
+ elsif (image =~ /^\/(.)*/)
39
+ uri = URI.parse(url)
40
+ image = File.join("#{uri.scheme}://#{uri.host}", image)
41
+ end
42
+ og_images << image
43
+ end
44
+ end
45
+ end
46
+ # Try to get the best image based on heuristics
47
+ image = get_best_image(og_images)
48
+
49
+ og_title = doc.search("//meta[@property='og:title' or @name='og:title']")
50
+ if (!is_blank?(og_title) && !is_blank?(og_title[0]["content"]))
51
+ clip_title = og_title[0]["content"]
52
+ else
53
+ page_title = doc.search("//title")[0]
54
+ clip_title = page_title.text if !is_blank?(page_title)
55
+ end
56
+
57
+ tags = []
58
+ possible_tags = doc.xpath('//meta[contains(@name, "tag") or contains(@name, "keyword") or contains(@property, "tag") or contains(@property, "keyword")]')
59
+ possible_tags.each{|e| tags << e["content"].split(',') if (!e["content"].nil?)}
60
+ tags = tags.flatten.map(&:strip).uniq!
61
+
62
+ type = doc.search("//meta[@property='og:type' or @name='og:type']")
63
+ type = is_blank?(type) ? nil : type[0]["content"]
64
+
65
+ favicon = "http://www.google.com/s2/favicons?domain_url=#{url}"
66
+ theme = get_dominant_color(favicon)
67
+
68
+ res = {:image => image, :title => clip_title, :tags => tags, :type => type, :favicon => favicon, :theme => theme}
69
+ end
70
+
71
+ def self.get_author_and_date(doc)
72
+ authors = []
73
+ possible_authors = doc.xpath('//meta[@property="author" or @name="author"]')
74
+ possible_authors.each {|a| authors << a["content"] if (!is_blank?(a["content"]))}
75
+
76
+ date = nil
77
+ possible_dates = doc.xpath('//meta[contains(@property, "date") or contains(@name, "date")]')
78
+ if (!is_blank?(possible_dates))
79
+ possible_dates.each do |po|
80
+ if (po["content"][0..3].to_i > 2000)
81
+ date = po["content"] if (!is_blank?(po["content"]))
82
+ break
83
+ end
84
+ end
85
+ end
86
+ if (date.nil?)
87
+ possible_dates = doc.xpath('//*[contains(@datetime, "2015")]')
88
+ date = possible_dates.first.attribute("datetime").value if (!is_blank?(possible_dates))
89
+ end
90
+ if (!date.nil?)
91
+ datetime = date.split.join(" ")[0..9].split("-")
92
+ t = Time.new(datetime[0], datetime[1], datetime[2])
93
+ date = "#{Date::MONTHNAMES[t.month]} #{t.day}, #{t.year}"
94
+ end
95
+ return {:date => date, :authors => authors}
96
+ end
97
+
98
+ def self.get_best_image(images)
99
+ return nil if (is_blank?(images))
100
+ return images[0] if (images.size == 1)
101
+ # reject logo or similar images
102
+ refined_images = images.reject{|i| i =~ /logo|fallback/i}
103
+ return refined_images[0] if (refined_images.size == 1)
104
+ refined_images = images if is_blank?(refined_images)
105
+ dimensions = []
106
+ refined_images.each do |i|
107
+ type = FastImage.type(Addressable::URI.escape(i))
108
+ size = FastImage.size(Addressable::URI.escape(i))
109
+ return i if((type == :gif) && (size && size[0] > 299 && size[1] > 199))
110
+ dimensions << {:x => size[0], :y => size[1], :image => i} if !size.nil?
111
+ end
112
+ image = is_blank?(dimensions) ? nil : dimensions.max_by{|d| d[:x]}[:image]
113
+ return image
114
+ end
115
+
116
+ def self.get_article_from_url(doc, url, type)
117
+ dates = get_author_and_date(doc)
118
+ article = doc.search("//article")
119
+ score = 0.9
120
+ article = [] if (article.count == 1 && article.text.split.join(" ").length < 500)
121
+ if (article.count > 1)
122
+ article = get_correct_article(article)
123
+ score = 0.8
124
+ end
125
+ if (!is_blank?(article))
126
+ article = find_article(doc)
127
+ score = 0.6
128
+ end
129
+ score = score - 0.5 if (type != "article")
130
+ if (is_blank?(article))
131
+ # image url
132
+ begin
133
+ html = doc.to_html
134
+ raise Exception.new("Image URL") if is_blank?(html)
135
+ return {:body => "", :text => "", :images => [], :score => 0}
136
+ rescue Exception => e
137
+ return {:body => "<figure><img src=\"#{url}\"></figure>", :text => "", :images => [url], :score => 1}
138
+ end
139
+ else
140
+ article = remove_unwanted_items_from(article)
141
+ article, score = find_relevant(article, score)
142
+ if (score > 0.9)
143
+ html, imgs = iteratively_clean(article, "", [], score)
144
+ else
145
+ html, imgs = recursively_clean(article, "", [], score)
146
+ end
147
+ response = {:body => html, :text => article.text.split.join(" "), :images => imgs, :score => score}.merge(dates)
148
+ end
149
+ end
150
+
151
+ def self.find_relevant(article, score)
152
+ p_elems = article.search("p")
153
+ total_p = p_elems.count
154
+ rel = article
155
+ max_p = 0
156
+ last_p = 0
157
+ if (!is_blank?(p_elems))
158
+ while (p_elems[0].text != article.text || p_elems[0].attribute("class") != article.attribute("class"))
159
+ parent_elems = p_elems.map {|p| p.parent()}
160
+ parent_elems.each do |ps|
161
+ if (ps.search("p").count > max_p)
162
+ max_p = ps.search("p").count
163
+ rel = ps
164
+ end
165
+ end
166
+ if ((last_p > 5 && last_p == max_p) || max_p - total_p < 2)
167
+ score = 0.95 if (score < 1)
168
+ break
169
+ end
170
+ last_p = max_p
171
+ p_elems = parent_elems
172
+ end
173
+ end
174
+ return rel, score
175
+ end
176
+
177
+ def self.iteratively_clean(element, html, images, score)
178
+ html = ""
179
+ imgs = []
180
+ element.children.each do |elem|
181
+ tv, ti = get_element_html(elem, [], score)
182
+ html = html + tv if (!is_blank?(tv))
183
+ imgs = imgs + ti if (!is_blank?(ti))
184
+ end
185
+ imgs.uniq!
186
+ return html, imgs
187
+ end
188
+
189
+ def self.recursively_clean(element, html, images, score)
190
+ allowable = ["p", "figure", "a", "h1", "h2", "h3", "h4", "text"]
191
+ if (is_blank?(element.children) || (element.class != Nokogiri::XML::NodeSet && (allowable.include?(element.name))))
192
+ tv, ti = get_element_html(element, [], score)
193
+ html = html + tv if (!is_blank?(tv))
194
+ images = images + ti
195
+ images.uniq!
196
+ else
197
+ if (element.class != Nokogiri::XML::NodeSet && (element.name == "header" || element.name == "footer"))
198
+ # Eat it
199
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "div" && !is_blank?(element.attribute("class")) && element.attribute("class").value.downcase.include?("meta") && element.text.split.join(" ").length < 300)
200
+ #Eat it
201
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "section" && score > 0.9)
202
+ #Eat it
203
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "ul" && score > 0.9)
204
+ #Eat it
205
+ elsif (element.class != Nokogiri::XML::NodeSet && element.name == "ol" && score > 0.9)
206
+ #Eat it
207
+ else
208
+ element.children.each do |art|
209
+ html, images = recursively_clean(art, html, images, score)
210
+ end
211
+ end
212
+ end
213
+ return html, images
214
+ end
215
+
216
+ def self.get_element_html(element, images, score)
217
+ if (element.name == "a")
218
+ # Eat it
219
+ elsif (element.name == "img")
220
+ img = get_valid_image(element)
221
+ if (!is_blank?(img))
222
+ tv = "<figure><img src=\"#{img}\"></figure>"
223
+ images = images + [img]
224
+ end
225
+ elsif (element.name == "h1" || element.name == "h2" || element.name == "h3" || element.name == "h4")
226
+ tv = "<h2>#{element.text.split.join(" ")}</h2>" if (!is_blank?(element.text.split.join(" ")))
227
+ elsif (element.name == "p" || element.name == "div")
228
+ p_elem, ti = extractp(element, score)
229
+ tv = "<p>#{p_elem}</p>" if (!is_blank?(p_elem))
230
+ images = images + ti if (!is_blank?(ti))
231
+ elsif (element.name == "figure")
232
+ cap = element.search("figcaption").text.split.join(" ")
233
+ cap = is_blank?(cap) ? "" : "<figcaption>#{cap}</figcaption>"
234
+ tv, ti = figurehandle(element, "", [])
235
+ tv = "<figure>#{tv}#{cap}</figure>" if (!is_blank?(tv))
236
+ images = images + ti
237
+ elsif (element.name == "text")
238
+ tv = element.text.split.join(" ")
239
+ tv = "<p class\"inline\">#{tv}</p>" if (!is_blank?(tv))
240
+ elsif (element.name == "i")
241
+ tv = element.text.split.join(" ")
242
+ tv = "<i>#{tv}</i>" if (!is_blank?(tv))
243
+ elsif (element.name == "ol" || element.name == "ul")
244
+ tv, ti = listhandle(element)
245
+ images = images + ti
246
+ end
247
+ return tv, images
248
+ end
249
+
250
+ def self.get_valid_image(element)
251
+ if (!is_blank?(element))
252
+ tsrc1 = nil
253
+ search_in = ["data-image", "data-original", "srcset", "data-src", "datasrc", "rel:bf_image_src", "src"]
254
+ search_in.each do |search|
255
+ tsrc = element.attribute(search)
256
+ if (!is_blank?(tsrc))
257
+ tsrc1 = tsrc.value
258
+ tsrc1 = tsrc1.split(",").first.split(" ")[0] if (search == "srcset")
259
+ break
260
+ end
261
+ end
262
+ if (!is_blank?(tsrc1))
263
+ tsrc1 = "http:" + tsrc1 if (tsrc1[0..1] == "//")
264
+ tv_size = FastImage.size(Addressable::URI.escape(tsrc1))
265
+ if (!tv_size.nil? && (tv_size[0] > 100 || tv_size[1] > 100))
266
+ return tsrc1
267
+ end
268
+ end
269
+ end
270
+ return ""
271
+ end
272
+
273
+ def self.extractp(element, score)
274
+ p_elem = nil
275
+ imgs = []
276
+ as = element.search("a")
277
+ if (!is_blank?(as) && element.text == as.text && score < 0.8)
278
+ return nil
279
+ end
280
+ p_elem, imgs = phandle(element, "", []) if (!is_blank?(element))
281
+ return p_elem, imgs
282
+ end
283
+
284
+ def self.phandle(element, html, images)
285
+ if (!is_blank?(element.children) && !(element.name == "a" && is_blank?(element.search("img"))))
286
+ element.children.each do |elem|
287
+ html, images = phandle(elem, html, images)
288
+ end
289
+ end
290
+ if (element.name == "img")
291
+ img = get_valid_image(element)
292
+ if (!is_blank?(img))
293
+ html = "</p><figure><img src=\"#{img}\"></figure><p>"
294
+ images << img
295
+ end
296
+ elsif (element.name == "a")
297
+ html = html + " <a href=\"#{element.attribute("href").value if (!is_blank?(element.attribute("href")))}\">#{element.text.split.join(" ")}</a> "
298
+ elsif (element.name == "text")
299
+ html = html + element.text.split.join(" ")
300
+ elsif (element.name == "br")
301
+ html = html + "<br>"
302
+ elsif (element.name == "p" && is_blank?(html))
303
+ html = element.text.split.join(" ")
304
+ end
305
+ return html, images
306
+ end
307
+
308
+ def self.figurehandle(element, html, images)
309
+ if (element.name == "img" || (!is_blank?(element.attribute("class")) && element.attribute("class").value.include?("js-delayed-image-load")))
310
+ img = get_valid_image(element)
311
+ if (!is_blank?(img))
312
+ html = html + "<img src=\"#{img}\">"
313
+ images << img
314
+ end
315
+ elsif (is_blank?(element.children))
316
+ return html, images
317
+ else
318
+ element.children.each do |elem|
319
+ html, images = figurehandle(elem, html, images)
320
+ end
321
+ end
322
+ return html, images
323
+ end
324
+
325
+ def self.listhandle(element)
326
+ html = ""
327
+ imgs = []
328
+ li_elems = element.search("li")
329
+ li_elems.each do |elem|
330
+ tv, ti = recursively_clean(elem, "", [], 0.95)
331
+ html = html + "<li>#{tv}</li>" if (!is_blank?(tv))
332
+ imgs = imgs + ti if (!is_blank?(ti))
333
+ end
334
+ if (element.name == "ul")
335
+ html = "<ul>#{html}</ul>" if (!is_blank?(html))
336
+ elsif (element.name == "ol")
337
+ html = "<ol>#{html}</ol>" if (!is_blank?(html))
338
+ end
339
+ return html, imgs
340
+ end
341
+
342
+ def self.find_article(doc)
343
+ article = doc.xpath('//*[@*="articleBody"]')
344
+ article = doc.xpath('//*[contains(@class, "article")]') if (is_blank?(article) || article.text.split.join(" ").length < 450)
345
+ article = doc.xpath('//*[contains(@class, "body")]') if (is_blank?(article) || article.text.split.join(" ").length < 450)
346
+ return article
347
+ end
348
+
349
+ def self.get_correct_article(articles)
350
+ articles.each do |article|
351
+ if (article.text.split.join(" ").length > 200)
352
+ return article
353
+ end
354
+ end
355
+ return nil
356
+ end
357
+
358
+ def self.remove_unwanted_items_from(article)
359
+ unwanted_elements = ["//script", "//comment()", "//aside", ".aside", "iframe", "//noscript", "//form"]
360
+ unwanted_elements.each do |elem|
361
+ article.search("#{elem}").remove
362
+ end
363
+ removable_elements = ["comment", "social", "advertisement", "share"]
364
+ removable_elements.each do |rem|
365
+ article.xpath("//*[contains(@*, '#{rem}')]").remove
366
+ end
367
+ return article
368
+ end
369
+
370
+ def self.get_dominant_color(url)
371
+ image = MiniMagick::Image.open(url)
372
+ color = image.run_command("convert", image.path, "-format", "%c\n", "-colors", 1, "-depth", 8, "histogram:info:").split(' ')
373
+ return color[Hash[color.map{|h| h =~ /^#/}.map.with_index.to_a][0]][0..6]
374
+ end
375
+
376
+ def self.is_blank?(value)
377
+ if (value.class == Nokogiri::XML::Element || value.class == Nokogiri::XML::Attr)
378
+ return (value.nil? || value.blank?)
379
+ else
380
+ return (value.nil? || value.empty?)
381
+ end
382
+ end
383
+
384
+ end
@@ -0,0 +1,3 @@
1
+ module Artext
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: artext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Anindya Mondal
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '3.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '3.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: addressable
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: httparty
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '0.13'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '0.13'
83
+ - !ruby/object:Gem::Dependency
84
+ name: fastimage
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '1.6'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '1.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: mini_magick
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: '3.7'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '3.7'
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '1.6'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ~>
123
+ - !ruby/object:Gem::Version
124
+ version: '1.6'
125
+ description: Extract article and other metadata from websites.
126
+ email:
127
+ - anindyamondal@mazdigital.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - Gemfile
133
+ - Gemfile.lock
134
+ - LICENSE.txt
135
+ - README.md
136
+ - Rakefile
137
+ - artext.gemspec
138
+ - lib/artext.rb
139
+ - lib/artext/version.rb
140
+ homepage: https://github.com/Anindya91/artext
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubyforge_project:
160
+ rubygems_version: 2.4.8
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Extract article from websites.
164
+ test_files: []