logstash-input-multirss 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cad07458c379cfcd9f3a7cbb9608f693a33efe5dfee0a2087387df491c135218
4
- data.tar.gz: a5956c590a9e0d9667b278de7f690d3a55ce9eaf886919f2b355c9e12fff8675
3
+ metadata.gz: d3e3215896069197c28eeaf2bc1d97b8a7b922b5f629d33f15c82b6cbe84c20a
4
+ data.tar.gz: d1391c39b3f2bf48888f93ca26633c821da10f439772408ac61de4ba9a561681
5
5
  SHA512:
6
- metadata.gz: 6611bda686c887dbaebba928718cd6521e4c2076e0c44feabdd1e13e89aef3aeb6a80cf8861dfba40b8b9a442f381262bc72023e6d3b3a14c86b94aadf3611ea
7
- data.tar.gz: d73bfa18ec57247ff7b566f94e2d6e6a9e710bd09b5b5563034bb9cc74c46ddc2bc5c9f8e13f268ceb259913195922388f3efe2f0c952689e88cee932cb6a453
6
+ metadata.gz: acbfa422e853ab42d439c960064f45df5277e372da471b20aa60f6a78d508fd872d9674fb009413fd0aee88b774d88eebe4e68db355c1ae6b7fee21f1d17d0c8
7
+ data.tar.gz: 0f1cf2179aa9722c65d610b6ea7c266866ad1719984cb89b280607c5fe3f6003688b55fcbbab33cb96255f517200ce5f637d8377c47600de3286b010a9597b7d
@@ -20,21 +20,22 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
20
20
  config :one_feed, :validate => :array, :default => []
21
21
 
22
22
  #Set de interval for stoppable_sleep
23
- config :interval, :validate => :number, :default => 200
23
+ config :interval, :validate => :number, :default => 3600
24
24
 
25
25
  #Set de black list to forget
26
- config :blacklist, :validate => :array, :default => ['http://fusion.google.com/','yahoo.com','live.com','netvibes.com','bloglines.com']
26
+ config :blacklist, :validate => :array, :default => []
27
27
 
28
28
  public
29
29
  def register
30
- @urls = []
31
30
  @agent = Mechanize.new
32
31
  @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
33
32
  end # def register
34
33
 
35
34
 
36
- def run(queue)
35
+ def run(queue)
37
36
  # we can abort the loop if stop? becomes true
37
+ urls = []
38
+
38
39
  while !stop?
39
40
 
40
41
  @multi_feed.each do |rss|
@@ -42,64 +43,82 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
42
43
  begin
43
44
  page = @agent.get(rss)
44
45
  page.links.each do |link|
45
- if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
46
- @urls << link.href
46
+ if (link.href.chars.last(3).join == "xml" || link.href.include?('/rss') || link.href.include?('/feed')) && not_include_blacklist(link)
47
+ urls << link.href
47
48
  end
48
49
  end
49
50
  rescue
50
51
  puts "Fail to get " + rss + " feed"
51
52
  end
52
53
 
53
- links = @urls.uniq
54
+ links = urls.uniq
54
55
  links.each do |link|
55
56
  begin
56
57
  response_link(link,queue)
57
58
  puts "Read clidren: " + link
58
59
  rescue
59
- puts "Fail to get " + link
60
+ puts "Fail to get " + link + " children"
60
61
  next
61
62
  end
62
63
  end
63
- @urls.clear
64
+ urls.clear
64
65
 
65
- end
66
+ end # multi_feed loop
66
67
 
67
68
  @one_feed.each do |feed|
68
- @urls << feed
69
- end
70
- links_o = @urls.uniq
71
- links_o.each do |link|
69
+ urls << feed
70
+ end # one_feed loop
71
+
72
+ all_links = urls.uniq
73
+ all_links.each do |link|
72
74
  begin
73
75
  response_link(link,queue)
74
76
  puts "Read clidren: " + link
75
77
  rescue
76
78
  puts "Fail to get " + link
77
79
  next
78
- end
79
- end
80
+ end # begin
81
+ end # all_links loop
80
82
 
81
- @urls.clear
83
+ urls.clear
82
84
 
83
85
  Stud.stoppable_sleep(@interval) { stop? }
84
- end # loop
85
- end # def run
86
86
 
87
+ end # loop while
88
+ end # def run
87
89
 
88
- def stop
89
-
90
- end
90
+
91
+ def stop
92
+ end #def stop
91
93
 
92
94
  def response_link(link, queue)
95
+ tried = 2
93
96
  begin
94
- page = Nokogiri::XML(open(link))
97
+ page = Nokogiri::XML(open(link,&:read))
95
98
  page.search('item').each do |item|
96
99
  link_rss_response(queue, item)
97
100
  end
101
+ rescue => ex
102
+ if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
103
+ link = "http:/" + link
104
+ retry
105
+ elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
106
+ link = "http:" + link
107
+ retry
108
+ end
109
+
110
+ if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
111
+ link = link.sub('http','https')
112
+ tried = tried - 1
113
+ retry if (tried > 0)
114
+ end
115
+
116
+ @logger.error("Error : ", :exception => ex)
98
117
  rescue => exc
99
118
  puts "ERROR"
100
119
  @logger.error("Uknown error while parsing the feed", :exception => exc)
101
- end
102
- end
120
+ end # begin
121
+ end # def response_link
103
122
 
104
123
  def link_rss_response(queue, item)
105
124
  event = LogStash::Event.new()
@@ -110,11 +129,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
110
129
  else
111
130
  eve = LogStash::Event.new( x.name => x.inner_html.to_s )
112
131
  event.append( eve )
113
- end
114
- end
132
+ end # if
133
+ end # loop
115
134
  decorate(event)
116
135
  queue << event
117
- end
136
+ end # def link_rss_response
118
137
 
119
138
  def not_include_blacklist(link)
120
139
  for i in 0..@blacklist.length-1
@@ -123,7 +142,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
123
142
  end
124
143
  end
125
144
  return true
126
- end
145
+ end # def not_include_blacklist
127
146
 
128
147
 
129
148
  end # class LogStash::Inputs::Crawler
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '1.0.0'
3
+ s.version = '1.0.1'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-14 00:00:00.000000000 Z
11
+ date: 2018-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement