scruber 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/scruber +4 -0
- data/lib/scruber/app_searcher.rb +31 -0
- data/lib/scruber/cli/project_generator.rb +47 -0
- data/lib/scruber/cli/templates/Gemfile.tt +6 -0
- data/lib/scruber/cli/templates/application.tt +18 -0
- data/lib/scruber/cli/templates/bin/scruber.tt +6 -0
- data/lib/scruber/cli/templates/boot.tt +3 -0
- data/lib/scruber/cli/templates/gitignore.tt +12 -0
- data/lib/scruber/cli/templates/initializers/proxies.tt +10 -0
- data/lib/scruber/cli/templates/initializers/user_agents.tt +14 -0
- data/lib/scruber/cli/templates/scrapers/sample.tt +7 -0
- data/lib/scruber/cli.rb +40 -0
- data/lib/scruber/core/configuration.rb +30 -0
- data/lib/scruber/core/crawler.rb +92 -0
- data/lib/scruber/core/extensions/base.rb +26 -0
- data/lib/scruber/core/extensions/csv_output.rb +62 -0
- data/lib/scruber/core/extensions/loop.rb +39 -0
- data/lib/scruber/core/page_format/base.rb +11 -0
- data/lib/scruber/core/page_format/html.rb +13 -0
- data/lib/scruber/core/page_format/xml.rb +13 -0
- data/lib/scruber/core/page_format.rb +33 -0
- data/lib/scruber/fetcher.rb +34 -0
- data/lib/scruber/fetcher_adapters/abstract_adapter.rb +119 -0
- data/lib/scruber/fetcher_adapters/typhoeus_fetcher.rb +78 -0
- data/lib/scruber/helpers/dictionary_reader/csv.rb +27 -0
- data/lib/scruber/helpers/dictionary_reader/xml.rb +23 -0
- data/lib/scruber/helpers/dictionary_reader.rb +33 -0
- data/lib/scruber/helpers/fetcher_agent.rb +40 -0
- data/lib/scruber/helpers/fetcher_agent_adapters/abstract_adapter.rb +69 -0
- data/lib/scruber/helpers/fetcher_agent_adapters/memory.rb +41 -0
- data/lib/scruber/helpers/proxy_rotator.rb +125 -0
- data/lib/scruber/helpers/user_agent_rotator.rb +91 -0
- data/lib/scruber/queue.rb +34 -0
- data/lib/scruber/queue_adapters/abstract_adapter.rb +112 -0
- data/lib/scruber/queue_adapters/memory.rb +70 -0
- data/lib/scruber/version.rb +3 -0
- data/lib/scruber.rb +69 -0
- data/scruber.gemspec +43 -0
- metadata +233 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6aecb299dd5c70bfe864f65d2faed704cdc3cde0
|
4
|
+
data.tar.gz: f0697ca03b0f1552e03fb1ee70d0265d5b8bed04
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2f52170ba8af4152b88e694537fa9e55156fa6d082c1e9ec409ae46808dc9ca28ff47ed241e8072c0edbba13b3e0be9f00504aef4542da52c3a280c282d8df71
|
7
|
+
data.tar.gz: 580d4c3840526dba3e62826433f8dddd09cdc42f569a2db05beb69b3428bfbd0fea92dc2faa0367175870327605f27f960e04be3d65d5c2dbd3c44c61719252d
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Ivan Goncharov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Scruber
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/scruber`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'scruber'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install scruber
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/scruber.
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "scruber"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/scruber
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Scruber
|
2
|
+
module AppSearcher
|
3
|
+
|
4
|
+
extend self
|
5
|
+
|
6
|
+
RUBY = Gem.ruby
|
7
|
+
EXECUTABLES = ["bin/scruber"]
|
8
|
+
|
9
|
+
def exec_app(name)
|
10
|
+
original_cwd = Dir.pwd
|
11
|
+
|
12
|
+
loop do
|
13
|
+
if exe = find_executable
|
14
|
+
exec RUBY, exe, *ARGV
|
15
|
+
break # non reachable, hack to be able to stub exec in the test suite
|
16
|
+
end
|
17
|
+
|
18
|
+
# If we exhaust the search there is no executable, this could be a
|
19
|
+
# call to generate a new application, so restore the original cwd.
|
20
|
+
Dir.chdir(original_cwd) && return if Pathname.new(Dir.pwd).root?
|
21
|
+
|
22
|
+
# Otherwise keep moving upwards in search of an executable.
|
23
|
+
Dir.chdir("..")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def find_executable
|
28
|
+
EXECUTABLES.find { |exe| File.file?(exe) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "thor"
|
2
|
+
require 'thor/group'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Scruber
|
6
|
+
module CLI
|
7
|
+
class ProjectGenerator < Thor::Group
|
8
|
+
include Thor::Actions
|
9
|
+
|
10
|
+
argument :path
|
11
|
+
class_option :queue, :default => 'memory', :aliases => '-q'
|
12
|
+
class_option :fetcher_agent, :default => 'memory', :aliases => '-fa'
|
13
|
+
|
14
|
+
def self.source_root
|
15
|
+
File.dirname(__FILE__) + '/templates'
|
16
|
+
end
|
17
|
+
|
18
|
+
def create_directories
|
19
|
+
raise ::Thor::Error, "ERROR: #{path} already exists." if File.exist?(path)
|
20
|
+
say "Creating scruber project at #{path}"
|
21
|
+
FileUtils.mkdir_p(path)
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_files
|
25
|
+
template 'Gemfile.tt', path+'/Gemfile'
|
26
|
+
template 'gitignore.tt', path+'/.gitignore'
|
27
|
+
template 'bin/scruber.tt', path+'/bin/scruber'
|
28
|
+
template 'application.tt', path+'/config/application.rb'
|
29
|
+
template 'boot.tt', path+'/config/boot.rb'
|
30
|
+
template 'boot.tt', path+'/config/boot.rb'
|
31
|
+
template 'initializers/proxies.tt', path+'/config/initializers/proxies.rb'
|
32
|
+
template 'initializers/user_agents.tt', path+'/config/initializers/user_agents.rb'
|
33
|
+
template 'scrapers/sample.tt', path+'/scrapers/sample.rb'
|
34
|
+
end
|
35
|
+
|
36
|
+
def init_project
|
37
|
+
inside path do
|
38
|
+
run "bundle"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_instructions
|
43
|
+
say "Run `scruber start sample` to run sample scraper"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
2
|
+
|
3
|
+
Bundler.require(:default)
|
4
|
+
|
5
|
+
Scruber.configure do |config|
|
6
|
+
config.fetcher_adapter = :typhoeus_fetcher
|
7
|
+
config.fetcher_options = {
|
8
|
+
max_concurrency: 1,
|
9
|
+
max_retry_times: 5,
|
10
|
+
retry_delays: [1,2,2,4,4],
|
11
|
+
followlocation: false,
|
12
|
+
request_timeout: 15,
|
13
|
+
}
|
14
|
+
config.fetcher_agent_adapter = :<%= options[:fetcher_agent] %>
|
15
|
+
config.fetcher_agent_options = {}
|
16
|
+
config.queue_adapter = :<%= options[:queue] %>
|
17
|
+
config.queue_options = {}
|
18
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# Scruber::Core::Extensions::Loop.add_dictionary(:proxy_list, File.expand_path(File.dirname(__FILE__))+'/proxies.xml', :xml)
|
2
|
+
|
3
|
+
# Scruber::Helpers::ProxyRotator.configure do
|
4
|
+
# clean
|
5
|
+
# set_mode :round_robin
|
6
|
+
|
7
|
+
# loop :proxy_list do |ua|
|
8
|
+
# add ua['name'], tags: ua['tags'].split(',').map(&:strip)
|
9
|
+
# end
|
10
|
+
# end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Scruber::Core::Extensions::Loop.add_dictionary(:user_agents, File.expand_path(File.dirname(__FILE__))+'/user_agents.xml', :xml)
|
2
|
+
|
3
|
+
Scruber::Helpers::UserAgentRotator.configure do
|
4
|
+
clean
|
5
|
+
set_filter :all
|
6
|
+
|
7
|
+
add "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36", tags: [:desktop, :modern, :chrome]
|
8
|
+
|
9
|
+
# How to access proxy_list dictionary
|
10
|
+
#
|
11
|
+
# loop :user_agents do |ua|
|
12
|
+
# add ua['name'], tags: ua['tags'].split(',').map(&:strip)
|
13
|
+
# end
|
14
|
+
end
|
data/lib/scruber/cli.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require "thor"
|
2
|
+
require "scruber"
|
3
|
+
require "scruber/cli/project_generator"
|
4
|
+
require "scruber/app_searcher"
|
5
|
+
|
6
|
+
module Scruber
|
7
|
+
module CLI
|
8
|
+
|
9
|
+
class Root < Thor
|
10
|
+
def self.exit_on_failure?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
register(ProjectGenerator, "new", "new PATH", "Create new project")
|
15
|
+
|
16
|
+
desc 'start', 'Run scraper'
|
17
|
+
def start(name)
|
18
|
+
if defined?(APP_PATH)
|
19
|
+
# raise ::Thor::Error, "ERROR: Scruber project not found." unless File.exist?(File.expand_path('config/application', Dir.pwd))
|
20
|
+
raise ::Thor::Error, "ERROR: Scraper not found." unless File.exist?(File.expand_path('../../scrapers/'+name+'.rb', APP_PATH))
|
21
|
+
say "booting..."
|
22
|
+
require APP_PATH
|
23
|
+
Dir[File.expand_path('../initializers/*.rb', APP_PATH)].sort.each do |i|
|
24
|
+
require i
|
25
|
+
end
|
26
|
+
say "starting #{name}"
|
27
|
+
require File.expand_path('../../scrapers/'+name+'.rb', APP_PATH)
|
28
|
+
else
|
29
|
+
Scruber::AppSearcher.exec_app(name)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'version', 'Display version'
|
34
|
+
map %w[-v --version] => :version
|
35
|
+
def version
|
36
|
+
say "Scruber #{VERSION}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
class Configuration
|
4
|
+
attr_accessor :fetcher_adapter,
|
5
|
+
:fetcher_options,
|
6
|
+
:fetcher_agent_adapter,
|
7
|
+
:fetcher_agent_options,
|
8
|
+
:queue_adapter,
|
9
|
+
:queue_options
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@fetcher_adapter = :typhoeus_fetcher
|
13
|
+
@fetcher_options = {}
|
14
|
+
@fetcher_agent_adapter = :memory
|
15
|
+
@fetcher_agent_options = {}
|
16
|
+
@queue_adapter = :memory
|
17
|
+
@queue_options = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge_options(options)
|
21
|
+
@fetcher_adapter = options.fetch(:fetcher_adapter){ @fetcher_adapter }
|
22
|
+
@fetcher_options.merge! options.fetch(:fetcher_options){ {} }
|
23
|
+
@fetcher_agent_adapter = options.fetch(:fetcher_agent_adapter){ @fetcher_agent_adapter }
|
24
|
+
@fetcher_agent_options.merge! options.fetch(:fetcher_agent_options){ {} }
|
25
|
+
@queue_adapter = options.fetch(:queue_adapter){ @queue_adapter }
|
26
|
+
@queue_options.merge! options.fetch(:queue_options){ {} }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
class Crawler
|
4
|
+
attr_reader :queue, :fetcher
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
Scruber.configuration.merge_options(options)
|
8
|
+
@callbacks_options = {}
|
9
|
+
@callbacks = {}
|
10
|
+
@on_complete_callbacks = {}
|
11
|
+
@queue = Scruber::Queue.new
|
12
|
+
@fetcher = Scruber::Fetcher.new
|
13
|
+
load_extenstions
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# Run crawling.
|
18
|
+
#
|
19
|
+
# @param block [Proc] crawler body
|
20
|
+
def run(&block)
|
21
|
+
instance_eval &block
|
22
|
+
while @queue.has_work? do
|
23
|
+
@fetcher.run @queue
|
24
|
+
while page = @queue.fetch_downloaded do
|
25
|
+
if @callbacks[page.page_type.to_sym]
|
26
|
+
processed_page = process_page(page, page.page_type.to_sym)
|
27
|
+
instance_exec page, processed_page, &(@callbacks[page.page_type.to_sym])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
@on_complete_callbacks.each do |_,callback|
|
32
|
+
instance_exec &(callback)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def parser(page_type, options={}, &block)
|
37
|
+
register_callback(page_type, options, &block)
|
38
|
+
end
|
39
|
+
|
40
|
+
def method_missing(method_sym, *arguments, &block)
|
41
|
+
Scruber::Core::Crawler._registered_method_missings.find do |(pattern, func)|
|
42
|
+
if method_sym.to_s =~ pattern
|
43
|
+
instance_exec method_sym, arguments, &(func)
|
44
|
+
true
|
45
|
+
else
|
46
|
+
false
|
47
|
+
end
|
48
|
+
end || super
|
49
|
+
end
|
50
|
+
|
51
|
+
def respond_to?(method_sym, include_private = false)
|
52
|
+
!Scruber::Core::Crawler._registered_method_missings.find do |(pattern, block)|
|
53
|
+
if method_sym.to_s =~ pattern
|
54
|
+
true
|
55
|
+
else
|
56
|
+
false
|
57
|
+
end
|
58
|
+
end.nil? || super(method_sym, include_private)
|
59
|
+
end
|
60
|
+
|
61
|
+
class << self
|
62
|
+
def register_method_missing(pattern, &block)
|
63
|
+
_registered_method_missings[pattern] = block
|
64
|
+
end
|
65
|
+
|
66
|
+
def _registered_method_missings
|
67
|
+
@registered_method_missings ||= {}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def register_callback(page_type, options, &block)
|
74
|
+
@callbacks_options[page_type.to_sym] = options || {}
|
75
|
+
@callbacks[page_type.to_sym] = block
|
76
|
+
end
|
77
|
+
|
78
|
+
def on_complete_callback(name, &block)
|
79
|
+
@on_complete_callbacks[name] = block
|
80
|
+
end
|
81
|
+
|
82
|
+
def process_page(page, page_type)
|
83
|
+
page_format = @callbacks_options[page_type].fetch(:page_format){ nil }
|
84
|
+
Scruber::Core::PageFormat.process(page, page_format)
|
85
|
+
end
|
86
|
+
|
87
|
+
def load_extenstions
|
88
|
+
Scruber::Core::Extensions::Base.descendants.each(&:register)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class Base
|
5
|
+
module CoreMethods
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
class << self
|
10
|
+
def register
|
11
|
+
Scruber::Core::Crawler.include self.const_get(:CoreMethods)
|
12
|
+
end
|
13
|
+
|
14
|
+
def inherited(subclass)
|
15
|
+
@descendants ||= []
|
16
|
+
@descendants << subclass
|
17
|
+
end
|
18
|
+
|
19
|
+
def descendants
|
20
|
+
@descendants
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class CsvOutput < Base
|
5
|
+
module CoreMethods
|
6
|
+
def csv_file(path, options={})
|
7
|
+
file_id = options.fetch(:file_id) { :default }.to_sym
|
8
|
+
options.delete(:file_id)
|
9
|
+
Scruber::Core::Extensions::CsvOutput.register_csv file_id, path, options
|
10
|
+
on_complete_callback :close_csv_files do
|
11
|
+
Scruber::Core::Extensions::CsvOutput.close_all
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_out(fields)
|
16
|
+
Scruber::Core::Extensions::CsvOutput.csv_out :default, fields
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.included(base)
|
20
|
+
Scruber::Core::Crawler.register_method_missing /\Acsv_(\w+)_file\Z/ do |meth, args|
|
21
|
+
file_id = meth.to_s.scan(/\Acsv_(\w+)_file\Z/).first.first.to_sym
|
22
|
+
path, options = args
|
23
|
+
options = {} if options.nil?
|
24
|
+
csv_file path, options.merge({file_id: file_id})
|
25
|
+
Scruber::Core::Crawler.class_eval do
|
26
|
+
define_method "csv_#{file_id}_out".to_sym do |fields|
|
27
|
+
Scruber::Core::Extensions::CsvOutput.csv_out(file_id, fields)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class << self
|
35
|
+
def csv_out(file_id, fields)
|
36
|
+
if _registered_files.keys.include?(file_id.to_sym)
|
37
|
+
_registered_files[file_id.to_sym] << fields
|
38
|
+
else
|
39
|
+
raise ArgumentError, "file #{file_id.inspect} not registered"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def register_csv(file_id, path, options)
|
44
|
+
mode = options.fetch(:mode){ 'wb' }
|
45
|
+
options.delete(:mode)
|
46
|
+
_registered_files[file_id] = CSV.open(path, mode, options)
|
47
|
+
end
|
48
|
+
|
49
|
+
def _registered_files
|
50
|
+
@registered_files ||= {}
|
51
|
+
end
|
52
|
+
|
53
|
+
def close_all
|
54
|
+
_registered_files.each{ |_,f| f.close }
|
55
|
+
@registered_files = {}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class Loop < Base
|
5
|
+
module CoreMethods
|
6
|
+
def loop(dictionary, options={}, &block)
|
7
|
+
Scruber::Core::Extensions::Loop.loop dictionary, options do |*args|
|
8
|
+
instance_exec *args, &block
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def loop(dictionary, options={})
|
15
|
+
if _registered_dictionaries.keys.include?(dictionary.to_sym)
|
16
|
+
Scruber::Helpers::DictionaryReader.read(_registered_dictionaries[dictionary.to_sym][:file_path], _registered_dictionaries[dictionary.to_sym][:file_type], options) do |obj|
|
17
|
+
yield obj
|
18
|
+
end
|
19
|
+
else
|
20
|
+
raise ArgumentError, "dictionary not registered, available dictionaries #{_registered_dictionaries.keys.inspect}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def add_dictionary(name, file_path, file_type)
|
25
|
+
_registered_dictionaries[name.to_sym] = {
|
26
|
+
file_path: file_path,
|
27
|
+
file_type: file_type
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def _registered_dictionaries
|
32
|
+
@registered_dictionaries ||= {}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|