fly_parser 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 220a36cf7e2845300835a2f2aa9c931e01245402
4
- data.tar.gz: 7bbcbb1959f8d78d690223265ce95b0418f5116e
3
+ metadata.gz: a4b60890098babea5cdf0a19b2ab7f72207bce93
4
+ data.tar.gz: a5cf7fdba982c30f622f2c048ea5ef08d6a4565a
5
5
  SHA512:
6
- metadata.gz: 3e33cb8372a87e60b830c91343bbcd05705300644c073f5094adf8017686f8d59a874e2a10d8672795c07823c5eeeb1695c184c52ac8fe2e99081eb21a8f74d1
7
- data.tar.gz: f398560cf2541d717de50ba5158996a0bb5bbb53b19fc443ee2d0283b41417cb873200acb4fd83bbf2e71847f6535bb12eb225dee86129fb18d2a01ad3a8107f
6
+ metadata.gz: 4d9bfcfbc33a83acbf053cca55206f27377a1f6ab62ead285def08fc43ef659ea61aee533c5c743b21ee06b8d7c22e79b77942370ce76ace69270e14f359f5fd
7
+ data.tar.gz: f4b199bf649c121cbf1883e00acf451742dd25830076bc3b55a62258e149d1c314026e447580a3c8e2eab3d42b6648e5b9adeb763056aa3bec10322c62d6c288
@@ -0,0 +1,30 @@
1
+ module Parser
2
+ class NewsNl < XmlBase
3
+ def initialize(source, options = {})
4
+ super
5
+ end
6
+
7
+ def parse_all
8
+ items = @source.search('//item')
9
+ # last_date = Time.now - 2.years # for dev 2 years
10
+ # select! or reject! is not exists for Nokogiri#NodeSet
11
+ # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
12
+ items.map do |item|
13
+ title = item.xpath('title/text()').text()
14
+ date = item.xpath('pubDate').first.content()
15
+ link = item.xpath('link/text()').text()
16
+ page = Nokogiri::HTML(open(link))
17
+
18
+ next if page.search('#article-image a img').first.nil?
19
+ poster_image = page.search('#article-image a img').first.attributes['src'].value
20
+ full_desc = item.xpath('description/text()')
21
+ # remove href attributes
22
+ full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
23
+
24
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
25
+ content = full_desc + copyright
26
+ {title: title, content: content, poster_image: poster_image}
27
+ end.compact
28
+ end
29
+ end
30
+ end
@@ -1,28 +1,7 @@
1
1
  module Parser
2
- class News
2
+ class News < XmlBase
3
3
  def initialize(source, options = {})
4
- if options[:type] == :file
5
- source = fake_url(source)
6
- end
7
- @copyright = copyright(options)
8
- @source = Parser.connect(source)
9
- @delay ||= 10
10
- end
11
-
12
- def fake_url(source)
13
- stream = File.read(source)
14
- # test_file.com is a random url, just for Mechanize parsing
15
- url = "http://www.google.com"
16
- FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
17
- url
18
- end
19
-
20
- def copyright(options)
21
- source = options[:source]
22
- {
23
- url: source['copyright'],
24
- title: source['copyright_title']
25
- }
4
+ super
26
5
  end
27
6
 
28
7
  def parse_all
@@ -0,0 +1,28 @@
1
+ module Parser
2
+ class XmlBase
3
+ def initialize(source, options = {})
4
+ if options[:type] == :file
5
+ source = fake_url(source)
6
+ end
7
+ @copyright = copyright(options)
8
+ @source = Parser.connect(source)
9
+ @delay ||= 10
10
+ end
11
+
12
+ def fake_url(source)
13
+ stream = File.read(source)
14
+ # test_file.com is a random url, just for Mechanize parsing
15
+ url = "http://www.google.com"
16
+ FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
17
+ url
18
+ end
19
+
20
+ def copyright(options)
21
+ source = options[:source]
22
+ {
23
+ url: source['copyright'],
24
+ title: source['copyright_title']
25
+ }
26
+ end
27
+ end
28
+ end
data/lib/fly_parser.rb CHANGED
@@ -5,9 +5,8 @@ require 'pry'
5
5
  require 'open-uri'
6
6
  require 'yaml'
7
7
  require 'mechanize'
8
- BASE_PATH = File.expand_path("fly_parser/base", File.dirname(__FILE__))
8
+ BASE_PATH = File.expand_path("fly_parser/*.rb", File.dirname(__FILE__))
9
9
  LOGO_PATH = File.expand_path("fly_parser/logo.txt", File.dirname(__FILE__))
10
- MECHANIZE_FIX = File.expand_path("fly_parser/mechanize_fix", File.dirname(__FILE__))
11
10
 
12
11
  Pry.config.print = proc { |output, value| output.puts value.ai }
13
12
 
@@ -24,17 +23,15 @@ def require_all(path)
24
23
  end
25
24
 
26
25
  unless defined? Rails
27
- require BASE_PATH
26
+ Dir[BASE_PATH].each do |base_file|
27
+ require base_file
28
+ end
28
29
  Dir.chdir RAILS_ROOT
29
30
  require RAILS_BOOT_PATH
30
31
  require RAILS_CONFIG_PATH
31
32
  require_all 'fly_parser/sources'
32
33
  end
33
34
 
34
- # fix mechanize by monkey-patching :)
35
- require MECHANIZE_FIX
36
-
37
-
38
35
  module Parser
39
36
  class << self
40
37
  # Get HTTP Source
@@ -114,6 +111,10 @@ module Parser
114
111
  item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
115
112
  end
116
113
  end
114
+ when "news-nl"
115
+ source["items"].each do |item|
116
+ item["parser"] = Parser::NewsNl.new(item["url"], source: source)
117
+ end
117
118
  end
118
119
  end
119
120
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -123,8 +123,10 @@ files:
123
123
  - lib/fly_parser/sources/astrology.rb
124
124
  - lib/fly_parser/sources/exercise.rb
125
125
  - lib/fly_parser/sources/fitness.rb
126
+ - lib/fly_parser/sources/news-nl.rb
126
127
  - lib/fly_parser/sources/news.rb
127
128
  - lib/fly_parser/sources/sport.rb
129
+ - lib/fly_parser/xml_base.rb
128
130
  homepage: http://rubygems.org
129
131
  licenses:
130
132
  - MIT