scruber 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/scruber +4 -0
- data/lib/scruber/app_searcher.rb +31 -0
- data/lib/scruber/cli/project_generator.rb +47 -0
- data/lib/scruber/cli/templates/Gemfile.tt +6 -0
- data/lib/scruber/cli/templates/application.tt +18 -0
- data/lib/scruber/cli/templates/bin/scruber.tt +6 -0
- data/lib/scruber/cli/templates/boot.tt +3 -0
- data/lib/scruber/cli/templates/gitignore.tt +12 -0
- data/lib/scruber/cli/templates/initializers/proxies.tt +10 -0
- data/lib/scruber/cli/templates/initializers/user_agents.tt +14 -0
- data/lib/scruber/cli/templates/scrapers/sample.tt +7 -0
- data/lib/scruber/cli.rb +40 -0
- data/lib/scruber/core/configuration.rb +30 -0
- data/lib/scruber/core/crawler.rb +92 -0
- data/lib/scruber/core/extensions/base.rb +26 -0
- data/lib/scruber/core/extensions/csv_output.rb +62 -0
- data/lib/scruber/core/extensions/loop.rb +39 -0
- data/lib/scruber/core/page_format/base.rb +11 -0
- data/lib/scruber/core/page_format/html.rb +13 -0
- data/lib/scruber/core/page_format/xml.rb +13 -0
- data/lib/scruber/core/page_format.rb +33 -0
- data/lib/scruber/fetcher.rb +34 -0
- data/lib/scruber/fetcher_adapters/abstract_adapter.rb +119 -0
- data/lib/scruber/fetcher_adapters/typhoeus_fetcher.rb +78 -0
- data/lib/scruber/helpers/dictionary_reader/csv.rb +27 -0
- data/lib/scruber/helpers/dictionary_reader/xml.rb +23 -0
- data/lib/scruber/helpers/dictionary_reader.rb +33 -0
- data/lib/scruber/helpers/fetcher_agent.rb +40 -0
- data/lib/scruber/helpers/fetcher_agent_adapters/abstract_adapter.rb +69 -0
- data/lib/scruber/helpers/fetcher_agent_adapters/memory.rb +41 -0
- data/lib/scruber/helpers/proxy_rotator.rb +125 -0
- data/lib/scruber/helpers/user_agent_rotator.rb +91 -0
- data/lib/scruber/queue.rb +34 -0
- data/lib/scruber/queue_adapters/abstract_adapter.rb +112 -0
- data/lib/scruber/queue_adapters/memory.rb +70 -0
- data/lib/scruber/version.rb +3 -0
- data/lib/scruber.rb +69 -0
- data/scruber.gemspec +43 -0
- metadata +233 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6aecb299dd5c70bfe864f65d2faed704cdc3cde0
|
4
|
+
data.tar.gz: f0697ca03b0f1552e03fb1ee70d0265d5b8bed04
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2f52170ba8af4152b88e694537fa9e55156fa6d082c1e9ec409ae46808dc9ca28ff47ed241e8072c0edbba13b3e0be9f00504aef4542da52c3a280c282d8df71
|
7
|
+
data.tar.gz: 580d4c3840526dba3e62826433f8dddd09cdc42f569a2db05beb69b3428bfbd0fea92dc2faa0367175870327605f27f960e04be3d65d5c2dbd3c44c61719252d
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Ivan Goncharov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Scruber
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/scruber`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'scruber'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install scruber
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/scruber.
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "scruber"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/scruber
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Scruber
|
2
|
+
module AppSearcher
|
3
|
+
|
4
|
+
extend self
|
5
|
+
|
6
|
+
RUBY = Gem.ruby
|
7
|
+
EXECUTABLES = ["bin/scruber"]
|
8
|
+
|
9
|
+
def exec_app(name)
|
10
|
+
original_cwd = Dir.pwd
|
11
|
+
|
12
|
+
loop do
|
13
|
+
if exe = find_executable
|
14
|
+
exec RUBY, exe, *ARGV
|
15
|
+
break # non reachable, hack to be able to stub exec in the test suite
|
16
|
+
end
|
17
|
+
|
18
|
+
# If we exhaust the search there is no executable, this could be a
|
19
|
+
# call to generate a new application, so restore the original cwd.
|
20
|
+
Dir.chdir(original_cwd) && return if Pathname.new(Dir.pwd).root?
|
21
|
+
|
22
|
+
# Otherwise keep moving upwards in search of an executable.
|
23
|
+
Dir.chdir("..")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def find_executable
|
28
|
+
EXECUTABLES.find { |exe| File.file?(exe) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "thor"
|
2
|
+
require 'thor/group'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Scruber
|
6
|
+
module CLI
|
7
|
+
class ProjectGenerator < Thor::Group
|
8
|
+
include Thor::Actions
|
9
|
+
|
10
|
+
argument :path
|
11
|
+
class_option :queue, :default => 'memory', :aliases => '-q'
|
12
|
+
class_option :fetcher_agent, :default => 'memory', :aliases => '-fa'
|
13
|
+
|
14
|
+
def self.source_root
|
15
|
+
File.dirname(__FILE__) + '/templates'
|
16
|
+
end
|
17
|
+
|
18
|
+
def create_directories
|
19
|
+
raise ::Thor::Error, "ERROR: #{path} already exists." if File.exist?(path)
|
20
|
+
say "Creating scruber project at #{path}"
|
21
|
+
FileUtils.mkdir_p(path)
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_files
|
25
|
+
template 'Gemfile.tt', path+'/Gemfile'
|
26
|
+
template 'gitignore.tt', path+'/.gitignore'
|
27
|
+
template 'bin/scruber.tt', path+'/bin/scruber'
|
28
|
+
template 'application.tt', path+'/config/application.rb'
|
29
|
+
template 'boot.tt', path+'/config/boot.rb'
|
30
|
+
template 'boot.tt', path+'/config/boot.rb'
|
31
|
+
template 'initializers/proxies.tt', path+'/config/initializers/proxies.rb'
|
32
|
+
template 'initializers/user_agents.tt', path+'/config/initializers/user_agents.rb'
|
33
|
+
template 'scrapers/sample.tt', path+'/scrapers/sample.rb'
|
34
|
+
end
|
35
|
+
|
36
|
+
def init_project
|
37
|
+
inside path do
|
38
|
+
run "bundle"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_instructions
|
43
|
+
say "Run `scruber start sample` to run sample scraper"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
2
|
+
|
3
|
+
Bundler.require(:default)
|
4
|
+
|
5
|
+
Scruber.configure do |config|
|
6
|
+
config.fetcher_adapter = :typhoeus_fetcher
|
7
|
+
config.fetcher_options = {
|
8
|
+
max_concurrency: 1,
|
9
|
+
max_retry_times: 5,
|
10
|
+
retry_delays: [1,2,2,4,4],
|
11
|
+
followlocation: false,
|
12
|
+
request_timeout: 15,
|
13
|
+
}
|
14
|
+
config.fetcher_agent_adapter = :<%= options[:fetcher_agent] %>
|
15
|
+
config.fetcher_agent_options = {}
|
16
|
+
config.queue_adapter = :<%= options[:queue] %>
|
17
|
+
config.queue_options = {}
|
18
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# Scruber::Core::Extensions::Loop.add_dictionary(:proxy_list, File.expand_path(File.dirname(__FILE__))+'/proxies.xml', :xml)
|
2
|
+
|
3
|
+
# Scruber::Helpers::ProxyRotator.configure do
|
4
|
+
# clean
|
5
|
+
# set_mode :round_robin
|
6
|
+
|
7
|
+
# loop :proxy_list do |ua|
|
8
|
+
# add ua['name'], tags: ua['tags'].split(',').map(&:strip)
|
9
|
+
# end
|
10
|
+
# end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Scruber::Core::Extensions::Loop.add_dictionary(:user_agents, File.expand_path(File.dirname(__FILE__))+'/user_agents.xml', :xml)
|
2
|
+
|
3
|
+
Scruber::Helpers::UserAgentRotator.configure do
|
4
|
+
clean
|
5
|
+
set_filter :all
|
6
|
+
|
7
|
+
add "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36", tags: [:desktop, :modern, :chrome]
|
8
|
+
|
9
|
+
# How to access proxy_list dictionary
|
10
|
+
#
|
11
|
+
# loop :user_agents do |ua|
|
12
|
+
# add ua['name'], tags: ua['tags'].split(',').map(&:strip)
|
13
|
+
# end
|
14
|
+
end
|
data/lib/scruber/cli.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require "thor"
|
2
|
+
require "scruber"
|
3
|
+
require "scruber/cli/project_generator"
|
4
|
+
require "scruber/app_searcher"
|
5
|
+
|
6
|
+
module Scruber
|
7
|
+
module CLI
|
8
|
+
|
9
|
+
class Root < Thor
|
10
|
+
def self.exit_on_failure?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
register(ProjectGenerator, "new", "new PATH", "Create new project")
|
15
|
+
|
16
|
+
desc 'start', 'Run scraper'
|
17
|
+
def start(name)
|
18
|
+
if defined?(APP_PATH)
|
19
|
+
# raise ::Thor::Error, "ERROR: Scruber project not found." unless File.exist?(File.expand_path('config/application', Dir.pwd))
|
20
|
+
raise ::Thor::Error, "ERROR: Scraper not found." unless File.exist?(File.expand_path('../../scrapers/'+name+'.rb', APP_PATH))
|
21
|
+
say "booting..."
|
22
|
+
require APP_PATH
|
23
|
+
Dir[File.expand_path('../initializers/*.rb', APP_PATH)].sort.each do |i|
|
24
|
+
require i
|
25
|
+
end
|
26
|
+
say "starting #{name}"
|
27
|
+
require File.expand_path('../../scrapers/'+name+'.rb', APP_PATH)
|
28
|
+
else
|
29
|
+
Scruber::AppSearcher.exec_app(name)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'version', 'Display version'
|
34
|
+
map %w[-v --version] => :version
|
35
|
+
def version
|
36
|
+
say "Scruber #{VERSION}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
class Configuration
|
4
|
+
attr_accessor :fetcher_adapter,
|
5
|
+
:fetcher_options,
|
6
|
+
:fetcher_agent_adapter,
|
7
|
+
:fetcher_agent_options,
|
8
|
+
:queue_adapter,
|
9
|
+
:queue_options
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@fetcher_adapter = :typhoeus_fetcher
|
13
|
+
@fetcher_options = {}
|
14
|
+
@fetcher_agent_adapter = :memory
|
15
|
+
@fetcher_agent_options = {}
|
16
|
+
@queue_adapter = :memory
|
17
|
+
@queue_options = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge_options(options)
|
21
|
+
@fetcher_adapter = options.fetch(:fetcher_adapter){ @fetcher_adapter }
|
22
|
+
@fetcher_options.merge! options.fetch(:fetcher_options){ {} }
|
23
|
+
@fetcher_agent_adapter = options.fetch(:fetcher_agent_adapter){ @fetcher_agent_adapter }
|
24
|
+
@fetcher_agent_options.merge! options.fetch(:fetcher_agent_options){ {} }
|
25
|
+
@queue_adapter = options.fetch(:queue_adapter){ @queue_adapter }
|
26
|
+
@queue_options.merge! options.fetch(:queue_options){ {} }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
class Crawler
|
4
|
+
attr_reader :queue, :fetcher
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
Scruber.configuration.merge_options(options)
|
8
|
+
@callbacks_options = {}
|
9
|
+
@callbacks = {}
|
10
|
+
@on_complete_callbacks = {}
|
11
|
+
@queue = Scruber::Queue.new
|
12
|
+
@fetcher = Scruber::Fetcher.new
|
13
|
+
load_extenstions
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# Run crawling.
|
18
|
+
#
|
19
|
+
# @param block [Proc] crawler body
|
20
|
+
def run(&block)
|
21
|
+
instance_eval &block
|
22
|
+
while @queue.has_work? do
|
23
|
+
@fetcher.run @queue
|
24
|
+
while page = @queue.fetch_downloaded do
|
25
|
+
if @callbacks[page.page_type.to_sym]
|
26
|
+
processed_page = process_page(page, page.page_type.to_sym)
|
27
|
+
instance_exec page, processed_page, &(@callbacks[page.page_type.to_sym])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
@on_complete_callbacks.each do |_,callback|
|
32
|
+
instance_exec &(callback)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def parser(page_type, options={}, &block)
|
37
|
+
register_callback(page_type, options, &block)
|
38
|
+
end
|
39
|
+
|
40
|
+
def method_missing(method_sym, *arguments, &block)
|
41
|
+
Scruber::Core::Crawler._registered_method_missings.find do |(pattern, func)|
|
42
|
+
if method_sym.to_s =~ pattern
|
43
|
+
instance_exec method_sym, arguments, &(func)
|
44
|
+
true
|
45
|
+
else
|
46
|
+
false
|
47
|
+
end
|
48
|
+
end || super
|
49
|
+
end
|
50
|
+
|
51
|
+
def respond_to?(method_sym, include_private = false)
|
52
|
+
!Scruber::Core::Crawler._registered_method_missings.find do |(pattern, block)|
|
53
|
+
if method_sym.to_s =~ pattern
|
54
|
+
true
|
55
|
+
else
|
56
|
+
false
|
57
|
+
end
|
58
|
+
end.nil? || super(method_sym, include_private)
|
59
|
+
end
|
60
|
+
|
61
|
+
class << self
|
62
|
+
def register_method_missing(pattern, &block)
|
63
|
+
_registered_method_missings[pattern] = block
|
64
|
+
end
|
65
|
+
|
66
|
+
def _registered_method_missings
|
67
|
+
@registered_method_missings ||= {}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def register_callback(page_type, options, &block)
|
74
|
+
@callbacks_options[page_type.to_sym] = options || {}
|
75
|
+
@callbacks[page_type.to_sym] = block
|
76
|
+
end
|
77
|
+
|
78
|
+
def on_complete_callback(name, &block)
|
79
|
+
@on_complete_callbacks[name] = block
|
80
|
+
end
|
81
|
+
|
82
|
+
def process_page(page, page_type)
|
83
|
+
page_format = @callbacks_options[page_type].fetch(:page_format){ nil }
|
84
|
+
Scruber::Core::PageFormat.process(page, page_format)
|
85
|
+
end
|
86
|
+
|
87
|
+
def load_extenstions
|
88
|
+
Scruber::Core::Extensions::Base.descendants.each(&:register)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class Base
|
5
|
+
module CoreMethods
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
class << self
|
10
|
+
def register
|
11
|
+
Scruber::Core::Crawler.include self.const_get(:CoreMethods)
|
12
|
+
end
|
13
|
+
|
14
|
+
def inherited(subclass)
|
15
|
+
@descendants ||= []
|
16
|
+
@descendants << subclass
|
17
|
+
end
|
18
|
+
|
19
|
+
def descendants
|
20
|
+
@descendants
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class CsvOutput < Base
|
5
|
+
module CoreMethods
|
6
|
+
def csv_file(path, options={})
|
7
|
+
file_id = options.fetch(:file_id) { :default }.to_sym
|
8
|
+
options.delete(:file_id)
|
9
|
+
Scruber::Core::Extensions::CsvOutput.register_csv file_id, path, options
|
10
|
+
on_complete_callback :close_csv_files do
|
11
|
+
Scruber::Core::Extensions::CsvOutput.close_all
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_out(fields)
|
16
|
+
Scruber::Core::Extensions::CsvOutput.csv_out :default, fields
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.included(base)
|
20
|
+
Scruber::Core::Crawler.register_method_missing /\Acsv_(\w+)_file\Z/ do |meth, args|
|
21
|
+
file_id = meth.to_s.scan(/\Acsv_(\w+)_file\Z/).first.first.to_sym
|
22
|
+
path, options = args
|
23
|
+
options = {} if options.nil?
|
24
|
+
csv_file path, options.merge({file_id: file_id})
|
25
|
+
Scruber::Core::Crawler.class_eval do
|
26
|
+
define_method "csv_#{file_id}_out".to_sym do |fields|
|
27
|
+
Scruber::Core::Extensions::CsvOutput.csv_out(file_id, fields)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class << self
|
35
|
+
def csv_out(file_id, fields)
|
36
|
+
if _registered_files.keys.include?(file_id.to_sym)
|
37
|
+
_registered_files[file_id.to_sym] << fields
|
38
|
+
else
|
39
|
+
raise ArgumentError, "file #{file_id.inspect} not registered"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def register_csv(file_id, path, options)
|
44
|
+
mode = options.fetch(:mode){ 'wb' }
|
45
|
+
options.delete(:mode)
|
46
|
+
_registered_files[file_id] = CSV.open(path, mode, options)
|
47
|
+
end
|
48
|
+
|
49
|
+
def _registered_files
|
50
|
+
@registered_files ||= {}
|
51
|
+
end
|
52
|
+
|
53
|
+
def close_all
|
54
|
+
_registered_files.each{ |_,f| f.close }
|
55
|
+
@registered_files = {}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Scruber
|
2
|
+
module Core
|
3
|
+
module Extensions
|
4
|
+
class Loop < Base
|
5
|
+
module CoreMethods
|
6
|
+
def loop(dictionary, options={}, &block)
|
7
|
+
Scruber::Core::Extensions::Loop.loop dictionary, options do |*args|
|
8
|
+
instance_exec *args, &block
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def loop(dictionary, options={})
|
15
|
+
if _registered_dictionaries.keys.include?(dictionary.to_sym)
|
16
|
+
Scruber::Helpers::DictionaryReader.read(_registered_dictionaries[dictionary.to_sym][:file_path], _registered_dictionaries[dictionary.to_sym][:file_type], options) do |obj|
|
17
|
+
yield obj
|
18
|
+
end
|
19
|
+
else
|
20
|
+
raise ArgumentError, "dictionary not registered, available dictionaries #{_registered_dictionaries.keys.inspect}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def add_dictionary(name, file_path, file_type)
|
25
|
+
_registered_dictionaries[name.to_sym] = {
|
26
|
+
file_path: file_path,
|
27
|
+
file_type: file_type
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def _registered_dictionaries
|
32
|
+
@registered_dictionaries ||= {}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|