fly_parser 0.0.23 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35c2945cf908943eea6512e24e7b39106cf9ee9b
4
- data.tar.gz: c8e48aa58849e9cc2696602f22a2b4a058fab8b3
3
+ metadata.gz: d7390fe6d8dd04f001c9d1df1d4876e828585438
4
+ data.tar.gz: 4baee214d519608b5ebf3cf891b16aea7acd9e2d
5
5
  SHA512:
6
- metadata.gz: 1bc47c495a712b29615ca23935f61011db8607bc48a2ebe4131da250f4fcada371dec4540a4dc5e43f93c394ca5b043787fcfa4bcbf336cf4c77f942cba18a79
7
- data.tar.gz: a62844515966e3af62cdad8ec7c50a57b68393def5f2f5049861cbc1bfd92b23f373712be63a8b786ef13b88d32b12260ca9242fad92518d4ab044d375284ba4
6
+ metadata.gz: 58133b0caf6bf9ec5b2714a4d29797c2156972f3701785dedf80a4eb9e484633db59a1807b07a9212b06ce687034d738b8b2cc86069ee3c1d3c432372cc82b7e
7
+ data.tar.gz: 216f6d754dd7ed4d8697885b2b1bf60b474d90763c5d7767daae999ce254384da889440d370c294947e572633d0accf9c697d74d0688eb606a80cf6995f1cbe0
@@ -116,8 +116,3 @@ module Parser
116
116
  end
117
117
 
118
118
  end
119
-
120
- # astrology
121
- # astro = Parser::Astrology.new('http://moj-znak-zodiaka.ru/')
122
- # astro.parse_in("Гороскоп на сентябрь 2014","big")
123
- # astro.parse_all
@@ -42,5 +42,13 @@ module Parser
42
42
  title: source['copyright_title']
43
43
  }
44
44
  end
45
+
46
+ def click(link)
47
+ agent = Mechanize.new
48
+ agent.ignore_bad_chunking = true
49
+ agent.pluggable_parser.default = Mechanize::Page
50
+ agent.click(link)
51
+ end
52
+
45
53
  end
46
- end
54
+ end
@@ -27,6 +27,10 @@ module Enable
27
27
  lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
28
28
  end
29
29
 
30
+ def news_kz(source)
31
+ lambda {|item| item["parser"] = Parser::NewsKZ.new(item["url"], source: source)}
32
+ end
33
+
30
34
  def method_missing(meth, *args)
31
35
  prefix = "enable_"
32
36
  meth = meth.to_s
@@ -42,4 +46,4 @@ module Enable
42
46
  def iterate_sources(source, block)
43
47
  source["items"].each(&block)
44
48
  end
45
- end
49
+ end
@@ -0,0 +1,42 @@
1
+ module Parser
2
+ class NewsKZ < Base
3
+ def initialize(source, options = {})
4
+ @delay = 2
5
+ super
6
+ end
7
+
8
+ def parse_all
9
+ links = @source.search("li.c__news_item a:first")
10
+
11
+ links.map do |link|
12
+ page = click(link)
13
+ begin
14
+ title = page.search(".c__article_caption").text()
15
+
16
+ content_wrapper = page.search('.c__article_text')
17
+
18
+ image_wrapper = content_wrapper.search('.wp-caption img').first
19
+ next unless image_wrapper
20
+ poster_image = image_wrapper.attributes['src'].value
21
+
22
+ content_wrapper.search('.wp-caption').remove()
23
+ content_wrapper.search('.c__article_mistake').remove()
24
+ content_wrapper.search('p[style="display:none"]').remove()
25
+ content_wrapper.search("a").remove()
26
+ content_wrapper.search("span:contains(Копирование)").remove()
27
+
28
+ full_desc = content_wrapper.to_html
29
+ full_desc.gsub!(/<iframe.*><\/iframe>/, '')
30
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
31
+ content = full_desc + copyright
32
+
33
+ {title: title, content: content, poster_image: poster_image}
34
+
35
+ rescue Exception => e
36
+ puts e.message
37
+ next
38
+ end
39
+ end.compact
40
+ end
41
+ end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module Parser
2
- VERSION = "0.0.23"
2
+ VERSION = "0.0.24"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -135,11 +135,11 @@ files:
135
135
  - lib/fly_parser/enable_source.rb
136
136
  - lib/fly_parser/logo.txt
137
137
  - lib/fly_parser/mechanize_fix.rb
138
- - lib/fly_parser/sources/astrology.rb
139
138
  - lib/fly_parser/sources/exercise.rb
140
139
  - lib/fly_parser/sources/fitness.rb
141
140
  - lib/fly_parser/sources/news-az.rb
142
141
  - lib/fly_parser/sources/news-fr.rb
142
+ - lib/fly_parser/sources/news-kz.rb
143
143
  - lib/fly_parser/sources/news-nl.rb
144
144
  - lib/fly_parser/sources/news.rb
145
145
  - lib/fly_parser/sources/sport.rb
@@ -170,3 +170,4 @@ signing_key:
170
170
  specification_version: 4
171
171
  summary: Fly parser
172
172
  test_files: []
173
+ has_rdoc:
@@ -1,67 +0,0 @@
1
- require 'hashie'
2
- module Parser
3
- class Astrology
4
-
5
- def initialize(source)
6
- @zodiacs = ['Овен','Телец','Близнецы','Рак','Лев','Дева','Весы','Скорпион','Стрелец','Козерог','Водолей','Рыбы']
7
- @source = Parser.connect(source)
8
-
9
- small_titles = ["Гороскоп на сегодня", "Гороскоп на завтра", "Гороскоп на неделю"]
10
- big_titles = ["Гороскопы на месяц", "Гороскоп на 2014 год", "Гороскоп на 2014 год зеленой Лошади", "Гороскоп на сентябрь 2014"]
11
- @titles = Hashie::Mash.new
12
- @titles.small = small_titles
13
- @titles.big = big_titles
14
- end
15
-
16
- def parse_in(text = "Гороскоп на сегодня", date = 'small')
17
- @text = text
18
- @date = date
19
- parse_content
20
- end
21
-
22
- def parse_content
23
- zodiac_links.map do |item|
24
- link = item.link
25
- zodiac = item.zodiac
26
- @page = Parser.http(link.value)
27
- content = (@date == 'small' ? parse_small : parse_big)
28
- result = Hashie::Mash.new
29
- result.zodiac = zodiac
30
- result.content = content
31
- result
32
- end
33
- end
34
-
35
- def parse_all
36
- small_content = @titles.small.map { |title| {title: title, content: parse_in(title,"small")} }
37
- big_content = @titles.big.map { |title| {title: title, content: parse_in(title,"big")} }
38
-
39
- small_content.concat big_content
40
- end
41
-
42
- private
43
-
44
- def current_page
45
- @source.link_with(:text => @text).click
46
- end
47
-
48
- def zodiac_links
49
- @zodiacs.map do |z|
50
- result = Hashie::Mash.new
51
- result.link = current_page.search("a:contains('#{z}')")[0].attributes["href"]
52
- result.zodiac = z
53
- result
54
- end
55
- end
56
-
57
- def parse_small
58
- @page.css('#main').children().reject do |el|
59
- (el.attributes['class'].value == 'lp50' || el.attributes['class'].value == 'rp50' || el.attributes['class'].value == "space" if el.attributes['class'] != nil) || ['img','br','b','h1'].include?(el.name) || ["\n","\n\n"].include?(el.text)
60
- end.join
61
- end
62
-
63
- def parse_big
64
- @page.css('#main .lp50').text()
65
- end
66
- end
67
- end