fly_parser 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 220a36cf7e2845300835a2f2aa9c931e01245402
4
- data.tar.gz: 7bbcbb1959f8d78d690223265ce95b0418f5116e
3
+ metadata.gz: a4b60890098babea5cdf0a19b2ab7f72207bce93
4
+ data.tar.gz: a5cf7fdba982c30f622f2c048ea5ef08d6a4565a
5
5
  SHA512:
6
- metadata.gz: 3e33cb8372a87e60b830c91343bbcd05705300644c073f5094adf8017686f8d59a874e2a10d8672795c07823c5eeeb1695c184c52ac8fe2e99081eb21a8f74d1
7
- data.tar.gz: f398560cf2541d717de50ba5158996a0bb5bbb53b19fc443ee2d0283b41417cb873200acb4fd83bbf2e71847f6535bb12eb225dee86129fb18d2a01ad3a8107f
6
+ metadata.gz: 4d9bfcfbc33a83acbf053cca55206f27377a1f6ab62ead285def08fc43ef659ea61aee533c5c743b21ee06b8d7c22e79b77942370ce76ace69270e14f359f5fd
7
+ data.tar.gz: f4b199bf649c121cbf1883e00acf451742dd25830076bc3b55a62258e149d1c314026e447580a3c8e2eab3d42b6648e5b9adeb763056aa3bec10322c62d6c288
@@ -0,0 +1,30 @@
1
+ module Parser
2
+ class NewsNl < XmlBase
3
+ def initialize(source, options = {})
4
+ super
5
+ end
6
+
7
+ def parse_all
8
+ items = @source.search('//item')
9
+ # last_date = Time.now - 2.years # for dev 2 years
10
+ # select! or reject! is not exists for Nokogiri#NodeSet
11
+ # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
12
+ items.map do |item|
13
+ title = item.xpath('title/text()').text()
14
+ date = item.xpath('pubDate').first.content()
15
+ link = item.xpath('link/text()').text()
16
+ page = Nokogiri::HTML(open(link))
17
+
18
+ next if page.search('#article-image a img').first.nil?
19
+ poster_image = page.search('#article-image a img').first.attributes['src'].value
20
+ full_desc = item.xpath('description/text()')
21
+ # remove href attributes
22
+ full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
23
+
24
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
25
+ content = full_desc + copyright
26
+ {title: title, content: content, poster_image: poster_image}
27
+ end.compact
28
+ end
29
+ end
30
+ end
@@ -1,28 +1,7 @@
1
1
  module Parser
2
- class News
2
+ class News < XmlBase
3
3
  def initialize(source, options = {})
4
- if options[:type] == :file
5
- source = fake_url(source)
6
- end
7
- @copyright = copyright(options)
8
- @source = Parser.connect(source)
9
- @delay ||= 10
10
- end
11
-
12
- def fake_url(source)
13
- stream = File.read(source)
14
- # test_file.com is a random url, just for Mechanize parsing
15
- url = "http://www.google.com"
16
- FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
17
- url
18
- end
19
-
20
- def copyright(options)
21
- source = options[:source]
22
- {
23
- url: source['copyright'],
24
- title: source['copyright_title']
25
- }
4
+ super
26
5
  end
27
6
 
28
7
  def parse_all
@@ -0,0 +1,28 @@
1
+ module Parser
2
+ class XmlBase
3
+ def initialize(source, options = {})
4
+ if options[:type] == :file
5
+ source = fake_url(source)
6
+ end
7
+ @copyright = copyright(options)
8
+ @source = Parser.connect(source)
9
+ @delay ||= 10
10
+ end
11
+
12
+ def fake_url(source)
13
+ stream = File.read(source)
14
+ # test_file.com is a random url, just for Mechanize parsing
15
+ url = "http://www.google.com"
16
+ FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
17
+ url
18
+ end
19
+
20
+ def copyright(options)
21
+ source = options[:source]
22
+ {
23
+ url: source['copyright'],
24
+ title: source['copyright_title']
25
+ }
26
+ end
27
+ end
28
+ end
data/lib/fly_parser.rb CHANGED
@@ -5,9 +5,8 @@ require 'pry'
5
5
  require 'open-uri'
6
6
  require 'yaml'
7
7
  require 'mechanize'
8
- BASE_PATH = File.expand_path("fly_parser/base", File.dirname(__FILE__))
8
+ BASE_PATH = File.expand_path("fly_parser/*.rb", File.dirname(__FILE__))
9
9
  LOGO_PATH = File.expand_path("fly_parser/logo.txt", File.dirname(__FILE__))
10
- MECHANIZE_FIX = File.expand_path("fly_parser/mechanize_fix", File.dirname(__FILE__))
11
10
 
12
11
  Pry.config.print = proc { |output, value| output.puts value.ai }
13
12
 
@@ -24,17 +23,15 @@ def require_all(path)
24
23
  end
25
24
 
26
25
  unless defined? Rails
27
- require BASE_PATH
26
+ Dir[BASE_PATH].each do |base_file|
27
+ require base_file
28
+ end
28
29
  Dir.chdir RAILS_ROOT
29
30
  require RAILS_BOOT_PATH
30
31
  require RAILS_CONFIG_PATH
31
32
  require_all 'fly_parser/sources'
32
33
  end
33
34
 
34
- # fix mechanize by monkey-patching :)
35
- require MECHANIZE_FIX
36
-
37
-
38
35
  module Parser
39
36
  class << self
40
37
  # Get HTTP Source
@@ -114,6 +111,10 @@ module Parser
114
111
  item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
115
112
  end
116
113
  end
114
+ when "news-nl"
115
+ source["items"].each do |item|
116
+ item["parser"] = Parser::NewsNl.new(item["url"], source: source)
117
+ end
117
118
  end
118
119
  end
119
120
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -123,8 +123,10 @@ files:
123
123
  - lib/fly_parser/sources/astrology.rb
124
124
  - lib/fly_parser/sources/exercise.rb
125
125
  - lib/fly_parser/sources/fitness.rb
126
+ - lib/fly_parser/sources/news-nl.rb
126
127
  - lib/fly_parser/sources/news.rb
127
128
  - lib/fly_parser/sources/sport.rb
129
+ - lib/fly_parser/xml_base.rb
128
130
  homepage: http://rubygems.org
129
131
  licenses:
130
132
  - MIT