apollo-crawler 0.0.44 → 0.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/apollo-crawler CHANGED
@@ -24,22 +24,22 @@ require File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', 'versio
24
24
 
25
25
  module Crawler
26
26
  class Program
27
- @@PLUGIN_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "plugins")
27
+ @@CRAWLERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "crawlers")
28
28
  @@FORMATTERS_DIR = File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler", "formatters")
29
- @@PLUGIN_TEMPLATE_NAME = "plugin_template.rb"
29
+ @@CRAWLER_TEMPLATE_NAME = "crawler_template.rb"
30
30
 
31
31
  # This hash will hold all of the options
32
32
  # parsed from the command-line by
33
33
  # OptionParser.
34
34
  @options = nil
35
35
  @optparser = nil
36
- @plugins = nil
36
+ @crawlers = nil
37
37
  @formatters = nil
38
38
  @formatter = nil
39
39
 
40
40
  # Initializer - Constructor
41
41
  def initialize
42
- @plugins = {}
42
+ @crawlers = {}
43
43
  @formatters = {}
44
44
  end
45
45
 
@@ -48,13 +48,13 @@ module Crawler
48
48
  @options = {}
49
49
  @options[:verbose] = false
50
50
  @options[:version] = false
51
- @options[:plugin_dirs] = [
52
- @@PLUGIN_DIR
51
+ @options[:crawler_dirs] = [
52
+ @@CRAWLERS_DIR
53
53
  ]
54
54
  @options[:formatter_dirs] = [
55
55
  @@FORMATTERS_DIR
56
56
  ]
57
- @options[:generate_plugin] = nil
57
+ @options[:generate_crawler] = nil
58
58
 
59
59
  @optparser = OptionParser.new do | opts |
60
60
  # This displays the help screen, all programs are
@@ -64,7 +64,7 @@ module Crawler
64
64
  exit
65
65
  end
66
66
 
67
- opts.on('-a', '--all', 'Run all plugins') do
67
+ opts.on('-a', '--all', 'Run all crawlers') do
68
68
  @options[:run_all] = true
69
69
  end
70
70
 
@@ -72,12 +72,12 @@ module Crawler
72
72
  @options[:formatter] = name
73
73
  end
74
74
 
75
- opts.on('-g', '--generate [NAME]', "Generate scaffold for new plugin") do |name|
76
- @options[:generate_plugin] = name
75
+ opts.on('-g', '--generate [NAME]', "Generate scaffold for new crawler") do |name|
76
+ @options[:generate_crawler] = name
77
77
  end
78
78
 
79
- opts.on('-i', '--include [PATH]', 'Include additional plugins or plugin directories') do |path|
80
- @options[:plugin_dirs] << path
79
+ opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
80
+ @options[:crawler_dirs] << path
81
81
  end
82
82
 
83
83
  opts.on('-v', '--verbose', 'Enable verbose output') do
@@ -88,8 +88,8 @@ module Crawler
88
88
  @options[:version] = true
89
89
  end
90
90
 
91
- opts.on('-l', '--list-plugins', 'List of plugins') do
92
- @options[:list_plugins] = true
91
+ opts.on('-l', '--list-crawlers', 'List of crawlers') do
92
+ @options[:list_crawlers] = true
93
93
  end
94
94
 
95
95
  opts.on(nil, '--list-formatters', 'List of formatters available') do
@@ -163,10 +163,10 @@ module Crawler
163
163
  end
164
164
  end
165
165
 
166
- # Register plugins (specific crawlers)
167
- def register_plugins(dir)
166
+ # Register crawlers
167
+ def register_crawlers(dir)
168
168
  if(@options[:verbose])
169
- puts "Registering plugins - '#{dir}'"
169
+ puts "Registering crawlers - '#{dir}'"
170
170
  end
171
171
 
172
172
  files = File.join(dir, "**", "*.rb")
@@ -174,36 +174,36 @@ module Crawler
174
174
  require file
175
175
  end
176
176
 
177
- tmp = Apollo::Crawler::Plugins.constants.select { |c|
178
- Class === Apollo::Crawler::Plugins.const_get(c)
177
+ tmp = Apollo::Crawler::Crawlers.constants.select { |c|
178
+ Class === Apollo::Crawler::Crawlers.const_get(c)
179
179
  }
180
180
 
181
181
  tmp.each do |x|
182
- klass = Object.const_get('Apollo').const_get('Crawler').const_get('Plugins').const_get(x)
183
- @plugins.merge!({ x.downcase.to_s => klass})
182
+ klass = Object.const_get('Apollo').const_get('Crawler').const_get('Crawlers').const_get(x)
183
+ @crawlers.merge!({ x.downcase.to_s => klass})
184
184
  end
185
185
 
186
186
  if(@options[:verbose])
187
- @plugins.each do |plugin, klass|
187
+ @crawlers.each do |crawler, klass|
188
188
  name = klass.new.class.name
189
189
 
190
- if name == "Apollo::Crawler::Plugins::Plugin"
190
+ if name == "Apollo::Crawler::Crawlers::Crawler"
191
191
  next
192
192
  end
193
193
 
194
- puts "Registered plugin '#{plugin}' -> '#{name}'"
194
+ puts "Registered crawler '#{crawler}' -> '#{name}'"
195
195
  end
196
196
  end
197
197
  end
198
198
 
199
- def generate_plugin(name, url = nil, matcher = nil)
199
+ def generate_crawler(name, url = nil, matcher = nil)
200
200
  name = name.titleize.gsub(" ", "")
201
201
 
202
202
  if(@options[:verbose])
203
- puts "Generating new plugin '#{name}'"
203
+ puts "Generating new crawler '#{name}'"
204
204
  end
205
205
 
206
- template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@PLUGIN_TEMPLATE_NAME)
206
+ template_path = File.join(File.dirname(__FILE__), '..', 'lib', 'apollo_crawler', @@CRAWLER_TEMPLATE_NAME)
207
207
  if(File.exists?(template_path) == false)
208
208
  puts "Template file '#{template_path}' does not exists!"
209
209
  return
@@ -219,23 +219,23 @@ module Crawler
219
219
  matcher = matcher ? matcher : "//a"
220
220
 
221
221
  placeholders = {
222
- "PLUGIN_CLASS_NAME" => name,
223
- "PLUGIN_NAME" => name.titleize,
224
- "PLUGIN_URL" => url,
225
- "PLUGIN_MATCHER" => matcher
222
+ "CRAWLER_CLASS_NAME" => name,
223
+ "CRAWLER_NAME" => name.titleize,
224
+ "CRAWLER_URL" => url,
225
+ "CRAWLER_MATCHER" => matcher
226
226
  }
227
227
 
228
- puts "Generating plugin '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
228
+ puts "Generating crawler '#{name.titleize}', class: '#{name}', path: '#{dest_path}'"
229
229
 
230
230
  File.open(template_path, 'r') do |tmpl|
231
- File.open(dest_path, 'w') do |plugin|
231
+ File.open(dest_path, 'w') do |crawler|
232
232
  while line = tmpl.gets
233
233
  #puts line
234
234
  placeholders.each do |k, v|
235
235
  line.gsub!(k, v)
236
236
  end
237
237
 
238
- plugin.puts line
238
+ crawler.puts line
239
239
  end
240
240
  end
241
241
  end
@@ -253,18 +253,18 @@ module Crawler
253
253
 
254
254
  load_config_file()
255
255
 
256
- if(@options[:generate_plugin])
257
- name = @options[:generate_plugin]
256
+ if(@options[:generate_crawler])
257
+ name = @options[:generate_crawler]
258
258
  url = ARGV.length > 0 ? ARGV[0] : nil
259
259
  matcher = ARGV.length > 1 ? ARGV[1] : nil
260
260
 
261
- self.generate_plugin(name, url, matcher)
261
+ self.generate_crawler(name, url, matcher)
262
262
  exit
263
263
  end
264
264
 
265
265
  # Register sites which can be crawled
266
- @options[:plugin_dirs].each do |dir|
267
- register_plugins(dir)
266
+ @options[:crawler_dirs].each do |dir|
267
+ register_crawlers(dir)
268
268
  end
269
269
 
270
270
  # Register sites which can be crawled
@@ -298,9 +298,9 @@ module Crawler
298
298
  return
299
299
  end
300
300
 
301
- if(@options[:list_plugins])
301
+ if(@options[:list_crawlers])
302
302
  headings = ['name', 'class']
303
- rows = @plugins
303
+ rows = @crawlers
304
304
 
305
305
  table = Terminal::Table.new :headings => headings, :rows => rows
306
306
 
@@ -308,27 +308,27 @@ module Crawler
308
308
  return
309
309
  end
310
310
 
311
- plugins = ARGV
311
+ crawlers = ARGV
312
312
 
313
313
  if(@options[:run_all])
314
- plugins = @plugins.keys
314
+ crawlers = @crawlers.keys
315
315
  end
316
316
 
317
- if(plugins.empty?)
317
+ if(crawlers.empty?)
318
318
  puts @optparser
319
319
  exit
320
320
  end
321
321
 
322
- plugins.each do |plugin|
323
- p = @plugins[plugin.downcase]
322
+ crawlers.each do |crawler|
323
+ p = @crawlers[crawler.downcase]
324
324
  if(p == nil)
325
- puts "Invalid plugin name - '#{plugin}'"
325
+ puts "Invalid crawler name - '#{crawler}'"
326
326
  puts "See program help"
327
327
  next
328
328
  end
329
329
 
330
330
  if(@options[:verbose])
331
- puts "Running '#{plugin}'"
331
+ puts "Running '#{crawler}'"
332
332
  end
333
333
 
334
334
  res = p.new.etl
@@ -1,16 +1,13 @@
1
- # require 'apollo_crawler/plugin'
2
-
3
1
  require 'apollo_crawler/crawler'
4
2
  require 'apollo_crawler/formatter'
5
- require 'apollo_crawler/plugin'
3
+
4
+ # Crawlers
5
+ require 'apollo_crawler/crawlers/alexa_com/alexa'
6
+ require 'apollo_crawler/crawlers/firmy_cz/firmy'
7
+ require 'apollo_crawler/crawlers/slashdot_org/slashdot'
8
+ require 'apollo_crawler/crawlers/ycombinator_com/hacker_news'
6
9
 
7
10
  # Formatters
8
11
  require 'apollo_crawler/formatters/formatter_json'
9
12
  require 'apollo_crawler/formatters/formatter_plain'
10
13
  require 'apollo_crawler/formatters/formatter_table'
11
-
12
- # Plugins
13
- require 'apollo_crawler/plugins/alexa_com/alexa'
14
- require 'apollo_crawler/plugins/firmy_cz/firmy'
15
- require 'apollo_crawler/plugins/slashdot_org/slashdot'
16
- require 'apollo_crawler/plugins/ycombinator_com/hacker_news'
@@ -0,0 +1,77 @@
1
+ require "open-uri"
2
+ require "nokogiri"
3
+
4
+ module Apollo
5
+ module Crawler
6
+ module Crawlers
7
+ class Crawler
8
+
9
+ # Name of the crawler
10
+ def name
11
+ return "Crawler Base"
12
+ end
13
+
14
+ def url
15
+ return nil
16
+ end
17
+
18
+ # - (0) Figure out URL
19
+ # - (1) Extract Data
20
+ # - (2) Extract Links
21
+ # - (3) Go to (0) eventually
22
+ def etl(url=nil)
23
+ # Look for passed URL use default instead and fail if it is not valid
24
+ url = url ? url : self.url
25
+ if(url.nil?)
26
+ return nil
27
+ end
28
+
29
+ # Try fetch document
30
+ doc = self.fetch_document(url)
31
+ if(doc.nil?)
32
+ return nil
33
+ end
34
+
35
+ # Try extract data from document
36
+ data = self.extract_data(doc)
37
+
38
+ # Try extract links for another documents
39
+ links = self.extract_links(doc)
40
+
41
+ # Return ETL result
42
+ return {
43
+ :crawler => self.class.name,
44
+ :title => doc.title,
45
+ :data => data,
46
+ :links => links
47
+ }
48
+ end
49
+
50
+ # Fetch document
51
+ def fetch_document(url)
52
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
53
+
54
+ if(self.url.nil?)
55
+ return nil
56
+ end
57
+
58
+ # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
59
+ doc = Nokogiri::HTML(ic.iconv(open(self.url).read))
60
+ return doc
61
+ end
62
+
63
+ # Extracts data from document
64
+ def extract_data(doc)
65
+ res = []
66
+ return res
67
+ end
68
+
69
+ # Extract links to another documents from this document
70
+ def extract_links(doc)
71
+ res = []
72
+ return res
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -2,17 +2,16 @@ require 'iconv'
2
2
 
3
3
  module Apollo
4
4
  module Crawler
5
- module Plugins
6
- # PARAMATRIZE: Plugin class name
7
- class PLUGIN_CLASS_NAME < Plugin
8
- @@MATCHER_ITEM = "PLUGIN_MATCHER"
5
+ module Crawlers
6
+ class CRAWLER_CLASS_NAME < Apollo::Crawler::Crawlers::Crawler
7
+ @@MATCHER_ITEM = "CRAWLER_MATCHER"
9
8
 
10
9
  def name()
11
- return "PLUGIN_NAME"
10
+ return "CRAWLER_NAME"
12
11
  end
13
12
 
14
13
  def url()
15
- return "PLUGIN_URL"
14
+ return "CRAWLER_URL"
16
15
  end
17
16
 
18
17
  def extract_data(doc)
@@ -24,6 +23,6 @@ module Apollo
24
23
  }
25
24
  end
26
25
  end
27
- end # Plugins
26
+ end # Crawlers
28
27
  end # Crawler
29
28
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
6
  module Crawler
7
- module Plugins
8
- class Alexa < Plugin
7
+ module Crawlers
8
+ class Alexa < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
10
10
 
11
11
  def name()
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
6
  module Crawler
7
- module Plugins
8
- class Firmy < Plugin
7
+ module Crawlers
8
+ class Firmy < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
10
10
 
11
11
  def name()
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
6
  module Crawler
7
- module Plugins
8
- class Slashdot < Plugin
7
+ module Crawlers
8
+ class Slashdot < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//article/header/h2/span/a"
10
10
 
11
11
  def name
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
- module Crawler
7
- module Plugins
8
- class StackOverflow < Plugin
6
+ module Crawlers
7
+ module Crawler
8
+ class StackOverflow < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//div[@class = 'summary']/h3/a"
10
10
 
11
11
  def name
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
6
  module Crawler
7
- module Plugins
8
- class Xkcd < Plugin
7
+ module Crawlers
8
+ class Xkcd < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//div[@id = 'comic']/img"
10
10
 
11
11
  def name()
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,11 +1,11 @@
1
1
  require 'iconv'
2
2
 
3
- require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
3
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
4
4
 
5
5
  module Apollo
6
6
  module Crawler
7
- module Plugins
8
- class HackerNews < Plugin
7
+ module Crawlers
8
+ class HackerNews < Apollo::Crawler::Crawlers::Crawler
9
9
  @@MATCHER_ITEM = "//td[@class = 'title']/a"
10
10
 
11
11
  def name
@@ -25,6 +25,6 @@ module Apollo
25
25
  }
26
26
  end
27
27
  end
28
- end # Plugins
28
+ end # Crawlers
29
29
  end # Crawler
30
30
  end # Apollo
@@ -1,5 +1,5 @@
1
1
  module Apollo
2
2
  module Crawler
3
- VERSION = '0.0.44'
3
+ VERSION = '0.0.45'
4
4
  end # Crawler
5
5
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.44
4
+ version: 0.0.45
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-23 00:00:00.000000000 Z
12
+ date: 2013-02-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: amqp
@@ -91,6 +91,22 @@ dependencies:
91
91
  - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: writeexcel
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
94
110
  - !ruby/object:Gem::Dependency
95
111
  name: iconv
96
112
  requirement: !ruby/object:Gem::Requirement
@@ -219,7 +235,7 @@ dependencies:
219
235
  - - ! '>='
220
236
  - !ruby/object:Gem::Version
221
237
  version: '0'
222
- description: Gem for crawling data from external resources
238
+ description: Gem for crawling data from external sources
223
239
  email: korczis@gmail.com
224
240
  executables:
225
241
  - apollo-crawler
@@ -230,16 +246,15 @@ files:
230
246
  - ./lib/apollo_crawler/formatters/formatter_json.rb
231
247
  - ./lib/apollo_crawler/formatters/formatter_table.rb
232
248
  - ./lib/apollo_crawler/version.rb
249
+ - ./lib/apollo_crawler/crawler_template.rb
233
250
  - ./lib/apollo_crawler/crawler.rb
251
+ - ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
252
+ - ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
253
+ - ./lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb
254
+ - ./lib/apollo_crawler/crawlers/firmy_cz/firmy.rb
255
+ - ./lib/apollo_crawler/crawlers/alexa_com/alexa.rb
256
+ - ./lib/apollo_crawler/crawlers/ycombinator_com/hacker_news.rb
234
257
  - ./lib/apollo_crawler/formatter.rb
235
- - ./lib/apollo_crawler/plugin_template.rb
236
- - ./lib/apollo_crawler/plugins/stackoverflow_com/stackoverflow.rb
237
- - ./lib/apollo_crawler/plugins/xkcd_com/xkcd.rb
238
- - ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
239
- - ./lib/apollo_crawler/plugins/firmy_cz/firmy.rb
240
- - ./lib/apollo_crawler/plugins/alexa_com/alexa.rb
241
- - ./lib/apollo_crawler/plugins/ycombinator_com/hacker_news.rb
242
- - ./lib/apollo_crawler/plugin.rb
243
258
  - ./lib/apollo_crawler.rb
244
259
  - bin/apollo-crawler
245
260
  homepage: https://github.com/korczis/apollo-crawler
@@ -1,73 +0,0 @@
1
- require "open-uri"
2
- require "nokogiri"
3
-
4
- module Apollo
5
- module Crawler
6
- module Plugins
7
- class Plugin
8
-
9
- # Name of the plugin, used in docs, lookups, etc ...
10
- def name
11
- return "Plugin Base"
12
- end
13
-
14
- def url
15
- return nil
16
- end
17
-
18
- def etl(url=nil)
19
- # Look for passed URL use default instead and fail if it is not valid
20
- url = url ? url : self.url
21
- if(url.nil?)
22
- return nil
23
- end
24
-
25
- # Try fetch document
26
- doc = self.fetch_document(url)
27
- if(doc.nil?)
28
- return nil
29
- end
30
-
31
- # Try extract data from document
32
- data = self.extract_data(doc)
33
-
34
- # Try extract links for another documents
35
- links = self.extract_links(doc)
36
-
37
- # Return ETL result
38
- return {
39
- :plugin => self.class.name,
40
- :title => doc.title,
41
- :data => data,
42
- :links => links
43
- }
44
- end
45
-
46
- # Fetch document
47
- def fetch_document(url)
48
- ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
49
-
50
- if(self.url.nil?)
51
- return nil
52
- end
53
-
54
- # TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
55
- doc = Nokogiri::HTML(ic.iconv(open(self.url).read))
56
- return doc
57
- end
58
-
59
- # Extracts data from document
60
- def extract_data(doc)
61
- res = []
62
- return res
63
- end
64
-
65
- # Extract links to another documents from this document
66
- def extract_links(doc)
67
- res = []
68
- return res
69
- end
70
- end
71
- end
72
- end
73
- end