logstash-input-multirss 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cad07458c379cfcd9f3a7cbb9608f693a33efe5dfee0a2087387df491c135218
4
- data.tar.gz: a5956c590a9e0d9667b278de7f690d3a55ce9eaf886919f2b355c9e12fff8675
3
+ metadata.gz: d3e3215896069197c28eeaf2bc1d97b8a7b922b5f629d33f15c82b6cbe84c20a
4
+ data.tar.gz: d1391c39b3f2bf48888f93ca26633c821da10f439772408ac61de4ba9a561681
5
5
  SHA512:
6
- metadata.gz: 6611bda686c887dbaebba928718cd6521e4c2076e0c44feabdd1e13e89aef3aeb6a80cf8861dfba40b8b9a442f381262bc72023e6d3b3a14c86b94aadf3611ea
7
- data.tar.gz: d73bfa18ec57247ff7b566f94e2d6e6a9e710bd09b5b5563034bb9cc74c46ddc2bc5c9f8e13f268ceb259913195922388f3efe2f0c952689e88cee932cb6a453
6
+ metadata.gz: acbfa422e853ab42d439c960064f45df5277e372da471b20aa60f6a78d508fd872d9674fb009413fd0aee88b774d88eebe4e68db355c1ae6b7fee21f1d17d0c8
7
+ data.tar.gz: 0f1cf2179aa9722c65d610b6ea7c266866ad1719984cb89b280607c5fe3f6003688b55fcbbab33cb96255f517200ce5f637d8377c47600de3286b010a9597b7d
@@ -20,21 +20,22 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
20
20
  config :one_feed, :validate => :array, :default => []
21
21
 
22
22
  #Set de interval for stoppable_sleep
23
- config :interval, :validate => :number, :default => 200
23
+ config :interval, :validate => :number, :default => 3600
24
24
 
25
25
  #Set de black list to forget
26
- config :blacklist, :validate => :array, :default => ['http://fusion.google.com/','yahoo.com','live.com','netvibes.com','bloglines.com']
26
+ config :blacklist, :validate => :array, :default => []
27
27
 
28
28
  public
29
29
  def register
30
- @urls = []
31
30
  @agent = Mechanize.new
32
31
  @agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
33
32
  end # def register
34
33
 
35
34
 
36
- def run(queue)
35
+ def run(queue)
37
36
  # we can abort the loop if stop? becomes true
37
+ urls = []
38
+
38
39
  while !stop?
39
40
 
40
41
  @multi_feed.each do |rss|
@@ -42,64 +43,82 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
42
43
  begin
43
44
  page = @agent.get(rss)
44
45
  page.links.each do |link|
45
- if link.href.chars.last(3).join == "xml" && not_include_blacklist(link)
46
- @urls << link.href
46
+ if (link.href.chars.last(3).join == "xml" || link.href.include?('/rss') || link.href.include?('/feed')) && not_include_blacklist(link)
47
+ urls << link.href
47
48
  end
48
49
  end
49
50
  rescue
50
51
  puts "Fail to get " + rss + " feed"
51
52
  end
52
53
 
53
- links = @urls.uniq
54
+ links = urls.uniq
54
55
  links.each do |link|
55
56
  begin
56
57
  response_link(link,queue)
57
58
  puts "Read clidren: " + link
58
59
  rescue
59
- puts "Fail to get " + link
60
+ puts "Fail to get " + link + " children"
60
61
  next
61
62
  end
62
63
  end
63
- @urls.clear
64
+ urls.clear
64
65
 
65
- end
66
+ end # multi_feed loop
66
67
 
67
68
  @one_feed.each do |feed|
68
- @urls << feed
69
- end
70
- links_o = @urls.uniq
71
- links_o.each do |link|
69
+ urls << feed
70
+ end # one_feed loop
71
+
72
+ all_links = urls.uniq
73
+ all_links.each do |link|
72
74
  begin
73
75
  response_link(link,queue)
74
76
  puts "Read clidren: " + link
75
77
  rescue
76
78
  puts "Fail to get " + link
77
79
  next
78
- end
79
- end
80
+ end # begin
81
+ end # all_links loop
80
82
 
81
- @urls.clear
83
+ urls.clear
82
84
 
83
85
  Stud.stoppable_sleep(@interval) { stop? }
84
- end # loop
85
- end # def run
86
86
 
87
+ end # loop while
88
+ end # def run
87
89
 
88
- def stop
89
-
90
- end
90
+
91
+ def stop
92
+ end #def stop
91
93
 
92
94
  def response_link(link, queue)
95
+ tried = 2
93
96
  begin
94
- page = Nokogiri::XML(open(link))
97
+ page = Nokogiri::XML(open(link,&:read))
95
98
  page.search('item').each do |item|
96
99
  link_rss_response(queue, item)
97
100
  end
101
+ rescue => ex
102
+ if link.chars.first(1).join == "/" && link.chars.first(2).join != "//"
103
+ link = "http:/" + link
104
+ retry
105
+ elsif link.chars.first(1).join == "/" && link.chars.first(2).join == "//"
106
+ link = "http:" + link
107
+ retry
108
+ end
109
+
110
+ if link.chars.first(4).join == "http" && link.chars.first(5).join != "https"
111
+ link = link.sub('http','https')
112
+ tried = tried - 1
113
+ retry if (tried > 0)
114
+ end
115
+
116
+ @logger.error("Error : ", :exception => ex)
98
117
  rescue => exc
99
118
  puts "ERROR"
100
119
  @logger.error("Uknown error while parsing the feed", :exception => exc)
101
- end
102
- end
120
+ end # begin
121
+ end # def response_link
103
122
 
104
123
  def link_rss_response(queue, item)
105
124
  event = LogStash::Event.new()
@@ -110,11 +129,11 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
110
129
  else
111
130
  eve = LogStash::Event.new( x.name => x.inner_html.to_s )
112
131
  event.append( eve )
113
- end
114
- end
132
+ end # if
133
+ end # loop
115
134
  decorate(event)
116
135
  queue << event
117
- end
136
+ end # def link_rss_response
118
137
 
119
138
  def not_include_blacklist(link)
120
139
  for i in 0..@blacklist.length-1
@@ -123,7 +142,7 @@ class LogStash::Inputs::Multirss < LogStash::Inputs::Base
123
142
  end
124
143
  end
125
144
  return true
126
- end
145
+ end # def not_include_blacklist
127
146
 
128
147
 
129
148
  end # class LogStash::Inputs::Crawler
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirss'
3
- s.version = '1.0.0'
3
+ s.version = '1.0.1'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = 'Simple multi rss plugin'
6
6
  s.description = 'This plugin needs a list of links of different rss. Get all the links of the main feed pages and get all the content of each of the links.'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirss
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felix R G
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-14 00:00:00.000000000 Z
11
+ date: 2018-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement