fly_parser 0.0.23 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/fly_parser.rb +0 -5
- data/lib/fly_parser/base.rb +9 -1
- data/lib/fly_parser/enable_source.rb +5 -1
- data/lib/fly_parser/sources/news-kz.rb +42 -0
- data/lib/fly_parser/version.rb +1 -1
- metadata +3 -2
- data/lib/fly_parser/sources/astrology.rb +0 -67
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7390fe6d8dd04f001c9d1df1d4876e828585438
|
4
|
+
data.tar.gz: 4baee214d519608b5ebf3cf891b16aea7acd9e2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58133b0caf6bf9ec5b2714a4d29797c2156972f3701785dedf80a4eb9e484633db59a1807b07a9212b06ce687034d738b8b2cc86069ee3c1d3c432372cc82b7e
|
7
|
+
data.tar.gz: 216f6d754dd7ed4d8697885b2b1bf60b474d90763c5d7767daae999ce254384da889440d370c294947e572633d0accf9c697d74d0688eb606a80cf6995f1cbe0
|
data/lib/fly_parser.rb
CHANGED
data/lib/fly_parser/base.rb
CHANGED
@@ -27,6 +27,10 @@ module Enable
|
|
27
27
|
lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
|
28
28
|
end
|
29
29
|
|
30
|
+
def news_kz(source)
|
31
|
+
lambda {|item| item["parser"] = Parser::NewsKZ.new(item["url"], source: source)}
|
32
|
+
end
|
33
|
+
|
30
34
|
def method_missing(meth, *args)
|
31
35
|
prefix = "enable_"
|
32
36
|
meth = meth.to_s
|
@@ -42,4 +46,4 @@ module Enable
|
|
42
46
|
def iterate_sources(source, block)
|
43
47
|
source["items"].each(&block)
|
44
48
|
end
|
45
|
-
end
|
49
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Parser
|
2
|
+
class NewsKZ < Base
|
3
|
+
def initialize(source, options = {})
|
4
|
+
@delay = 2
|
5
|
+
super
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_all
|
9
|
+
links = @source.search("li.c__news_item a:first")
|
10
|
+
|
11
|
+
links.map do |link|
|
12
|
+
page = click(link)
|
13
|
+
begin
|
14
|
+
title = page.search(".c__article_caption").text()
|
15
|
+
|
16
|
+
content_wrapper = page.search('.c__article_text')
|
17
|
+
|
18
|
+
image_wrapper = content_wrapper.search('.wp-caption img').first
|
19
|
+
next unless image_wrapper
|
20
|
+
poster_image = image_wrapper.attributes['src'].value
|
21
|
+
|
22
|
+
content_wrapper.search('.wp-caption').remove()
|
23
|
+
content_wrapper.search('.c__article_mistake').remove()
|
24
|
+
content_wrapper.search('p[style="display:none"]').remove()
|
25
|
+
content_wrapper.search("a").remove()
|
26
|
+
content_wrapper.search("span:contains(Копирование)").remove()
|
27
|
+
|
28
|
+
full_desc = content_wrapper.to_html
|
29
|
+
full_desc.gsub!(/<iframe.*><\/iframe>/, '')
|
30
|
+
copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
|
31
|
+
content = full_desc + copyright
|
32
|
+
|
33
|
+
{title: title, content: content, poster_image: poster_image}
|
34
|
+
|
35
|
+
rescue Exception => e
|
36
|
+
puts e.message
|
37
|
+
next
|
38
|
+
end
|
39
|
+
end.compact
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/fly_parser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fly_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ruslan Korolev
|
@@ -135,11 +135,11 @@ files:
|
|
135
135
|
- lib/fly_parser/enable_source.rb
|
136
136
|
- lib/fly_parser/logo.txt
|
137
137
|
- lib/fly_parser/mechanize_fix.rb
|
138
|
-
- lib/fly_parser/sources/astrology.rb
|
139
138
|
- lib/fly_parser/sources/exercise.rb
|
140
139
|
- lib/fly_parser/sources/fitness.rb
|
141
140
|
- lib/fly_parser/sources/news-az.rb
|
142
141
|
- lib/fly_parser/sources/news-fr.rb
|
142
|
+
- lib/fly_parser/sources/news-kz.rb
|
143
143
|
- lib/fly_parser/sources/news-nl.rb
|
144
144
|
- lib/fly_parser/sources/news.rb
|
145
145
|
- lib/fly_parser/sources/sport.rb
|
@@ -170,3 +170,4 @@ signing_key:
|
|
170
170
|
specification_version: 4
|
171
171
|
summary: Fly parser
|
172
172
|
test_files: []
|
173
|
+
has_rdoc:
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require 'hashie'
|
2
|
-
module Parser
|
3
|
-
class Astrology
|
4
|
-
|
5
|
-
def initialize(source)
|
6
|
-
@zodiacs = ['Овен','Телец','Близнецы','Рак','Лев','Дева','Весы','Скорпион','Стрелец','Козерог','Водолей','Рыбы']
|
7
|
-
@source = Parser.connect(source)
|
8
|
-
|
9
|
-
small_titles = ["Гороскоп на сегодня", "Гороскоп на завтра", "Гороскоп на неделю"]
|
10
|
-
big_titles = ["Гороскопы на месяц", "Гороскоп на 2014 год", "Гороскоп на 2014 год зеленой Лошади", "Гороскоп на сентябрь 2014"]
|
11
|
-
@titles = Hashie::Mash.new
|
12
|
-
@titles.small = small_titles
|
13
|
-
@titles.big = big_titles
|
14
|
-
end
|
15
|
-
|
16
|
-
def parse_in(text = "Гороскоп на сегодня", date = 'small')
|
17
|
-
@text = text
|
18
|
-
@date = date
|
19
|
-
parse_content
|
20
|
-
end
|
21
|
-
|
22
|
-
def parse_content
|
23
|
-
zodiac_links.map do |item|
|
24
|
-
link = item.link
|
25
|
-
zodiac = item.zodiac
|
26
|
-
@page = Parser.http(link.value)
|
27
|
-
content = (@date == 'small' ? parse_small : parse_big)
|
28
|
-
result = Hashie::Mash.new
|
29
|
-
result.zodiac = zodiac
|
30
|
-
result.content = content
|
31
|
-
result
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def parse_all
|
36
|
-
small_content = @titles.small.map { |title| {title: title, content: parse_in(title,"small")} }
|
37
|
-
big_content = @titles.big.map { |title| {title: title, content: parse_in(title,"big")} }
|
38
|
-
|
39
|
-
small_content.concat big_content
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
def current_page
|
45
|
-
@source.link_with(:text => @text).click
|
46
|
-
end
|
47
|
-
|
48
|
-
def zodiac_links
|
49
|
-
@zodiacs.map do |z|
|
50
|
-
result = Hashie::Mash.new
|
51
|
-
result.link = current_page.search("a:contains('#{z}')")[0].attributes["href"]
|
52
|
-
result.zodiac = z
|
53
|
-
result
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def parse_small
|
58
|
-
@page.css('#main').children().reject do |el|
|
59
|
-
(el.attributes['class'].value == 'lp50' || el.attributes['class'].value == 'rp50' || el.attributes['class'].value == "space" if el.attributes['class'] != nil) || ['img','br','b','h1'].include?(el.name) || ["\n","\n\n"].include?(el.text)
|
60
|
-
end.join
|
61
|
-
end
|
62
|
-
|
63
|
-
def parse_big
|
64
|
-
@page.css('#main .lp50').text()
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|