metacrunch 3.1.4 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/metacrunch/db.rb DELETED
@@ -1,8 +0,0 @@
1
- require "sequel"
2
-
3
- module Metacrunch
4
- class Db
5
- require_relative "db/reader"
6
- require_relative "db/writer"
7
- end
8
- end
@@ -1,33 +0,0 @@
1
- require "metacrunch/db"
2
-
3
- module Metacrunch
4
- class Db::Reader
5
-
6
- def initialize(database_connection_or_url, dataset_proc, options = {})
7
- @rows_per_fetch = options.delete(:rows_per_fetch) || 1000
8
-
9
- @db = if database_connection_or_url.is_a?(String)
10
- Sequel.connect(database_connection_or_url, options)
11
- else
12
- database_connection_or_url
13
- end
14
-
15
- @dataset = dataset_proc.call(@db).unlimited
16
-
17
- unless @dataset.opts[:order]
18
- raise ArgumentError, "Metacrunch::Db::Reader requires the dataset be ordered."
19
- end
20
- end
21
-
22
- def each(&block)
23
- return enum_for(__method__) unless block_given?
24
-
25
- @dataset.paged_each(rows_per_fetch: @rows_per_fetch, strategy: :filter) do |row|
26
- yield(row)
27
- end
28
-
29
- self
30
- end
31
-
32
- end
33
- end
@@ -1,55 +0,0 @@
1
- require "metacrunch/db"
2
-
3
- module Metacrunch
4
- class Db::Writer
5
-
6
- def initialize(database_connection_or_url, dataset_proc, options = {})
7
- @use_upsert = options.delete(:use_upsert) || false
8
- @id_key = options.delete(:id_key) || :id
9
- @isolation_level = options.delete(:isolation_level) || :repeatable
10
- @transaction_retries = options.delete(:transaction_retries) || 5
11
-
12
- @db = if database_connection_or_url.is_a?(String)
13
- Sequel.connect(database_connection_or_url, options)
14
- else
15
- database_connection_or_url
16
- end
17
-
18
- @dataset = dataset_proc.call(@db)
19
- end
20
-
21
- def write(data)
22
- if data.is_a?(Array)
23
- @db.transaction(isolation: @isolation_level, num_retries: @transaction_retries) do
24
- data.each{|d| insert_or_upsert(d) }
25
- end
26
- else
27
- insert_or_upsert(data)
28
- end
29
- end
30
-
31
- def close
32
- @db.disconnect
33
- end
34
-
35
- private
36
-
37
- def insert_or_upsert(data)
38
- @use_upsert ? upsert(data) : insert(data)
39
- end
40
-
41
- def insert(data)
42
- @dataset.insert(data) if data
43
- end
44
-
45
- def upsert(data)
46
- if data
47
- rec = @dataset.where(@id_key => data[@id_key])
48
- if 1 != rec.update(data)
49
- insert(data)
50
- end
51
- end
52
- end
53
-
54
- end
55
- end
data/lib/metacrunch/fs.rb DELETED
@@ -1,6 +0,0 @@
1
- module Metacrunch
2
- module Fs
3
- require_relative "fs/reader"
4
- require_relative "fs/entry"
5
- end
6
- end
@@ -1,17 +0,0 @@
1
- module Metacrunch
2
- class Fs::Entry
3
-
4
- attr_reader :filename, :archive_filename, :contents
5
-
6
- def initialize(filename:, archive_filename: nil, contents: nil)
7
- @filename = filename
8
- @archive_filename = archive_filename.presence
9
- @contents = contents
10
- end
11
-
12
- def from_archive?
13
- @archive_filename != nil
14
- end
15
-
16
- end
17
- end
@@ -1,63 +0,0 @@
1
- require "metacrunch/fs"
2
- require "rubygems/package"
3
-
4
- module Metacrunch
5
- class Fs::Reader
6
- include Metacrunch::ParallelProcessableReader
7
-
8
- def initialize(filenames = nil)
9
- @filenames = [*filenames].map{|f| f.presence}.compact
10
- end
11
-
12
- def each(&block)
13
- return enum_for(__method__) unless block_given?
14
-
15
- offset = 0 + process_index
16
-
17
- while offset < @filenames.count do
18
- _filename = @filenames[offset]
19
-
20
- if is_archive?(_filename)
21
- read_archive(_filename, &block)
22
- else
23
- read_regular_file(_filename, &block)
24
- end
25
-
26
- offset += number_of_processes
27
- end
28
- end
29
-
30
- private
31
-
32
- def is_archive?(filename)
33
- filename.ends_with?(".tar") || filename.ends_with?(".tar.gz") || filename.ends_with?(".tgz")
34
- end
35
-
36
- def is_gzip_file?(filename)
37
- filename.ends_with?(".gz") || filename.ends_with?(".tgz")
38
- end
39
-
40
- def read_regular_file(filename, &block)
41
- if File.file?(filename)
42
- io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : File.open(filename, "r")
43
- yield Fs::Entry.new(filename: filename, archive_filename: nil, contents: io.read)
44
- end
45
- end
46
-
47
- def read_archive(filename, &block)
48
- io = is_gzip_file?(filename) ? Zlib::GzipReader.open(filename) : File.open(filename, "r")
49
- tarReader = Gem::Package::TarReader.new(io)
50
-
51
- tarReader.each do |_tar_entry|
52
- if _tar_entry.file?
53
- yield Fs::Entry.new(
54
- filename: filename,
55
- archive_filename: _tar_entry.full_name,
56
- contents: _tar_entry.read
57
- )
58
- end
59
- end
60
- end
61
-
62
- end
63
- end
@@ -1,102 +0,0 @@
1
- module Metacrunch
2
- class Job::Dsl::OptionSupport
3
-
4
- def register_options(args, require_args: false, &block)
5
- options = {}
6
- registry.instance_eval(&block)
7
-
8
- registry.each do |key, opt_def|
9
- # Set default value
10
- options[key] = opt_def[:default]
11
-
12
- # Register with OptionParser
13
- if opt_def[:args].present?
14
- option = parser.define(*opt_def[:args]) { |value| options[key] = value }
15
-
16
- option.desc << "REQUIRED" if opt_def[:required]
17
- option.desc << "DEFAULT: #{opt_def[:default]}" if opt_def[:default].present?
18
-
19
- parser_options[key] = option
20
- end
21
- end
22
-
23
- # Finally parse CLI options with OptionParser
24
- args = parser.parse(args || [])
25
-
26
- # Make sure required options are present
27
- ensure_required_options!(options)
28
-
29
- # Make sure args are present if required
30
- ensure_required_args!(args) if require_args
31
-
32
- options
33
- end
34
-
35
- private
36
-
37
- def parser
38
- @parser ||= OptionParser.new do |parser|
39
- parser.banner = "Usage: metacrunch [options] JOB_FILE @@ [job-options] [ARGS]\nJob options:"
40
- end
41
- end
42
-
43
- def parser_options
44
- @parser_options ||= {}
45
- end
46
-
47
- def registry
48
- @registry ||= OptionRegistry.new
49
- end
50
-
51
- def ensure_required_options!(options)
52
- registry.each do |key, opt_def|
53
- if opt_def[:required] && options[key].blank?
54
- long_option = parser_options[key].long.try(:[], 0)
55
- short_option = parser_options[key].short.try(:[], 0)
56
-
57
- puts "Error: Required job option `#{long_option || short_option}` missing."
58
- puts parser.help
59
-
60
- exit(1)
61
- end
62
- end
63
- end
64
-
65
- def ensure_required_args!(args)
66
- if args.blank?
67
- puts "Error: Required ARGS are missing."
68
- puts parser.help
69
-
70
- exit(1)
71
- end
72
- end
73
-
74
- private
75
-
76
- class OptionRegistry
77
-
78
- def add(name, *args, default: nil, required: false)
79
- if default && required
80
- raise ArgumentError, "You can't use `default` and `required` option at the same time."
81
- end
82
-
83
- options[name.to_sym] = {
84
- args: args,
85
- default: default,
86
- required: required
87
- }
88
- end
89
-
90
- def each(&block)
91
- options.each(&block)
92
- end
93
-
94
- private
95
-
96
- def options
97
- @options ||= {}
98
- end
99
- end
100
-
101
- end
102
- end
@@ -1,21 +0,0 @@
1
- module Metacrunch
2
- module ParallelProcessableReader
3
-
4
- def set_parallel_process_options(number_of_processes: 1, process_index: 0)
5
- raise ArgumentError, "number_of_processes must be >= 1" if number_of_processes < 1
6
- raise ArgumentError, "process_index must be >= 0" if process_index < 0
7
-
8
- @number_of_processes = number_of_processes
9
- @process_index = process_index
10
- end
11
-
12
- def number_of_processes
13
- @number_of_processes || 1
14
- end
15
-
16
- def process_index
17
- @process_index || 0
18
- end
19
-
20
- end
21
- end
@@ -1,8 +0,0 @@
1
- require "redis"
2
-
3
- module Metacrunch
4
- class Redis
5
- require_relative "redis/queue_reader"
6
- require_relative "redis/queue_writer"
7
- end
8
- end
@@ -1,43 +0,0 @@
1
- require "metacrunch/redis"
2
-
3
- module Metacrunch
4
- class Redis::QueueReader
5
- include Metacrunch::ParallelProcessableReader
6
-
7
- def initialize(redis_connection_or_url, queue_name, options = {})
8
- @queue_name = queue_name
9
- raise ArgumentError, "queue_name must be a string" unless queue_name.is_a?(String)
10
-
11
- @blocking_mode = options.delete(:blocking) || false
12
- @blocking_timeout = options.delete(:blocking_timeout) || 0
13
-
14
- @redis = if redis_connection_or_url.is_a?(String)
15
- ::Redis.new(url: redis_connection_or_url)
16
- else
17
- redis_connection_or_url
18
- end
19
- end
20
-
21
- def each(&block)
22
- return enum_for(__method__) unless block_given?
23
-
24
- if @blocking_mode
25
- while true
26
- list, result = @redis.blpop(@queue_name, timeout: @blocking_timeout)
27
- if result.present?
28
- yield JSON.parse(result)
29
- else
30
- yield nil
31
- end
32
- end
33
- else
34
- while result = @redis.lpop(@queue_name)
35
- yield JSON.parse(result)
36
- end
37
- end
38
-
39
- self
40
- end
41
-
42
- end
43
- end
@@ -1,39 +0,0 @@
1
- require "metacrunch/redis"
2
-
3
- module Metacrunch
4
- class Redis::QueueWriter
5
-
6
- def initialize(redis_connection_or_url, queue_name, options = {})
7
- @queue_name = queue_name
8
- raise ArgumentError, "queue_name must be a string" unless queue_name.is_a?(String)
9
-
10
- @save_on_close = options.delete(:save_on_close) || true
11
-
12
- @redis = if redis_connection_or_url.is_a?(String)
13
- ::Redis.new(url: redis_connection_or_url)
14
- else
15
- redis_connection_or_url
16
- end
17
- end
18
-
19
- def write(data)
20
- @redis.rpush(@queue_name, data)
21
- rescue RuntimeError => e
22
- if e.message =~ /maxmemory/
23
- puts "Redis has reached maxmemory. Waiting 10 seconds and trying again..."
24
- sleep(10)
25
- retry
26
- else
27
- raise e
28
- end
29
- end
30
-
31
- def close
32
- if @redis
33
- @redis.bgsave if @save_on_close
34
- @redis.close
35
- end
36
- end
37
-
38
- end
39
- end
@@ -1,33 +0,0 @@
1
- require "metacrunch/redis"
2
-
3
- module Metacrunch
4
- class Redis::Writer
5
-
6
- def initialize(redis_connection_or_url, options = {})
7
- @save_on_close = options.delete(:save_on_close) || true
8
-
9
- @key = options.delete(:key) || :key
10
-
11
- @redis = if redis_connection_or_url.is_a?(String)
12
- ::Redis.new(url: redis_connection_or_url)
13
- else
14
- redis_connection_or_url
15
- end
16
- end
17
-
18
- def write(data)
19
- key = data[@key]
20
- raise ArgumentError, "No key found in data. Tried '#{@key}' but didn't found a value." unless key
21
-
22
- @redis.set(key.to_s, data.to_json)
23
- end
24
-
25
- def close
26
- if @redis
27
- @redis.bgsave if @save_on_close
28
- @redis.close
29
- end
30
- end
31
-
32
- end
33
- end