apollo-crawler 0.0.36 → 0.0.37

Sign up to get free protection for your applications and to get access to all the features.
data/bin/apollo-crawler CHANGED
@@ -18,6 +18,7 @@ require "optparse"
18
18
  require 'active_support'
19
19
  require 'active_support/inflector'
20
20
 
21
+ require 'terminal-table'
21
22
 
22
23
  require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'version')
23
24
 
@@ -219,16 +220,12 @@ module Crawler
219
220
  end
220
221
 
221
222
  if(@options[:list_plugins])
222
- puts "Listing plugins"
223
- puts "----------------------------------------"
224
- i = 0
225
- @plugins.sort.each do |plugin, klass|
226
- instance = klass.new
227
- # puts klass.class_eval("@@NAME")
228
- puts "(#{i}) #{plugin} - #{instance.name}"
229
- i += 1
230
- end
231
- puts "----------------------------------------"
223
+ headings = ['name', 'class']
224
+ rows = @plugins
225
+
226
+ table = Terminal::Table.new :headings => headings, :rows => rows
227
+
228
+ puts table
232
229
  return
233
230
  end
234
231
 
@@ -250,7 +247,10 @@ module Crawler
250
247
  next
251
248
  end
252
249
 
253
- # puts "Running '#{plugin}'"
250
+ if(@options[:verbose])
251
+ puts "Running '#{plugin}'"
252
+ end
253
+
254
254
  res = p.new.etl
255
255
  if(res.nil?)
256
256
  next
@@ -15,38 +15,31 @@ module Apollo
15
15
  return nil
16
16
  end
17
17
 
18
- def etl()
19
- #return run()
20
- res = []
21
-
22
- if(self.url.nil?)
18
+ def etl(url=nil)
19
+ # Look for passed URL use default instead and fail if it is not valid
20
+ url = url ? url : self.url
21
+ if(url.nil?)
23
22
  return nil
24
23
  end
25
24
 
26
- doc = self.fetch_document(self.url)
25
+ # Try fetch document
26
+ doc = self.fetch_document(url)
27
27
  if(doc.nil?)
28
28
  return nil
29
29
  end
30
30
 
31
- res = self.extract_data(doc)
31
+ # Try extract data from document
32
+ data = self.extract_data(doc)
33
+
34
+ # Try extract links for another documents
35
+ links = self.extract_links(doc)
32
36
 
37
+ # Return ETL result
33
38
  return {
34
39
  :plugin => self.class.name,
35
40
  :title => doc.title,
36
- :data => res
37
- }
38
- end
39
-
40
- # - Fetch default URL (and transform it to document)
41
- # - Extract and Load (Store) important data
42
- # - Look for another documents
43
- # Examples:
44
- # - "next page"
45
- # - "people you may know on Linked in"
46
- # - "will attend on FB")
47
- def run
48
- return {
49
- :plugin => self.class.name
41
+ :data => data,
42
+ :links => links
50
43
  }
51
44
  end
52
45
 
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.36'
3
+ VERSION = '0.0.37'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.36
4
+ version: 0.0.37
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -123,6 +123,22 @@ dependencies:
123
123
  - - ! '>='
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: terminal-table
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
126
142
  - !ruby/object:Gem::Dependency
127
143
  name: thor
128
144
  requirement: !ruby/object:Gem::Requirement