humboldt 1.1.1-java → 2.0.0.pre0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ext/rubydoop.rb +6 -0
- data/lib/humboldt/cli.rb +2 -2
- data/lib/humboldt/prefix_grouping.rb +4 -4
- data/lib/humboldt/version.rb +1 -1
- data/lib/humboldt.jar +0 -0
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec5fa36058d75fbd2686ba85089e582a54942d7a
|
4
|
+
data.tar.gz: fa4304eb631f72984a4561bcf611fe5f11c336f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6086a49331ce3f0c971d2ad5814e0b708e81e9b7d33d8ec38ed40e538a03029a76e4f63eff4d75ffdaa965ebae33f69e3dac0e0403e6a059d92c761b2cbc366
|
7
|
+
data.tar.gz: 4537396a939263fe5fd93082286e56b90d5787c063cf3c791dcdde22825c1d2d5e4cbdcac36fb0bf7338009e4ea93bb8d3a771ca5efcde1c07158719c79057ef
|
data/lib/ext/rubydoop.rb
CHANGED
@@ -2,21 +2,27 @@
|
|
2
2
|
|
3
3
|
module Rubydoop
|
4
4
|
class JobDefinition
|
5
|
+
# @private
|
5
6
|
alias mapperrr mapper
|
7
|
+
|
6
8
|
def mapper(cls)
|
7
9
|
map_output_key cls.output_key.const_get(:HADOOP) if cls.respond_to?(:output_key)
|
8
10
|
map_output_value cls.output_value.const_get(:HADOOP) if cls.respond_to?(:output_value)
|
9
11
|
mapperrr cls
|
10
12
|
end
|
11
13
|
|
14
|
+
# @private
|
12
15
|
alias reducerrr reducer
|
16
|
+
|
13
17
|
def reducer(cls)
|
14
18
|
output_key cls.output_key.const_get(:HADOOP) if cls.respond_to?(:output_key)
|
15
19
|
output_value cls.output_value.const_get(:HADOOP) if cls.respond_to?(:output_value)
|
16
20
|
reducerrr cls
|
17
21
|
end
|
18
22
|
|
23
|
+
# @private
|
19
24
|
alias inputtt input
|
25
|
+
|
20
26
|
def input(paths, options={})
|
21
27
|
options = options.dup
|
22
28
|
format = options[:format]
|
data/lib/humboldt/cli.rb
CHANGED
@@ -60,7 +60,7 @@ module Humboldt
|
|
60
60
|
end
|
61
61
|
input_glob = File.join(options[:data_path], options[:input])
|
62
62
|
hadoop_config_path = options[:hadoop_config] || default_hadoop_config_path
|
63
|
-
run_command('hadoop', 'jar', project_jar, '-conf', hadoop_config_path,
|
63
|
+
run_command('hadoop', 'jar', project_jar, job_config, '-conf', hadoop_config_path, input_glob, output_path, *options[:extra_hadoop_args])
|
64
64
|
end
|
65
65
|
|
66
66
|
# @deprecated EMR support will be removed in 2.0
|
@@ -189,7 +189,7 @@ module Humboldt
|
|
189
189
|
end
|
190
190
|
|
191
191
|
def check_job!
|
192
|
-
raise Thor::Error, "No such job: #{job_config}" unless File.exists?("
|
192
|
+
raise Thor::Error, "No such job: #{job_config}" unless File.exists?("bin/#{job_config}")
|
193
193
|
end
|
194
194
|
|
195
195
|
def relative_path(path)
|
@@ -4,7 +4,7 @@ require 'zlib'
|
|
4
4
|
|
5
5
|
|
6
6
|
module Humboldt
|
7
|
-
# @deprecated Use {Rubydoop::JobDescription
|
7
|
+
# @deprecated Use {Rubydoop::JobDescription#secondary_sort}
|
8
8
|
class BinaryPrefixPartitioner
|
9
9
|
def initialize(cutoff_index)
|
10
10
|
@cutoff_index = cutoff_index
|
@@ -17,7 +17,7 @@ module Humboldt
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
# @deprecated Use {Rubydoop::JobDescription
|
20
|
+
# @deprecated Use {Rubydoop::JobDescription#secondary_sort}
|
21
21
|
class DropBinaryPrefixPartitioner < BinaryPrefixPartitioner
|
22
22
|
def partition(key, value, num_partitions)
|
23
23
|
length = key.length > @cutoff_index ? key.length - @cutoff_index : 0
|
@@ -26,7 +26,7 @@ module Humboldt
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
# @deprecated Use {Rubydoop::JobDescription
|
29
|
+
# @deprecated Use {Rubydoop::JobDescription#secondary_sort}
|
30
30
|
class BinaryPrefixComparator
|
31
31
|
def initialize(cutoff_index)
|
32
32
|
@cutoff_index = cutoff_index
|
@@ -39,7 +39,7 @@ module Humboldt
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
-
# @deprecated Use {Rubydoop::JobDescription
|
42
|
+
# @deprecated Use {Rubydoop::JobDescription#secondary_sort}
|
43
43
|
class DropBinaryPrefixComparator < BinaryPrefixComparator
|
44
44
|
def compare_raw(bytes1, start1, length1, bytes2, start2, length2)
|
45
45
|
subset_length1 = length1 - @cutoff_index
|
data/lib/humboldt/version.rb
CHANGED
data/lib/humboldt.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: humboldt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.pre0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- The Burt Platform Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -27,17 +27,17 @@ dependencies:
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
30
|
-
- -
|
30
|
+
- - '='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
32
|
+
version: 2.0.0.pre1
|
33
33
|
name: rubydoop
|
34
34
|
prerelease: false
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 2.0.0.pre1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
@@ -94,13 +94,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
94
|
version: '0'
|
95
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
96
|
requirements:
|
97
|
-
- - '
|
97
|
+
- - '>'
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version:
|
99
|
+
version: 1.3.1
|
100
100
|
requirements: []
|
101
101
|
rubyforge_project:
|
102
|
-
rubygems_version: 2.
|
102
|
+
rubygems_version: 2.4.8
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: Tools and libraries for simplifying running Rubydoop jobs locally and on AWS Elastic MapReduce
|
106
106
|
test_files: []
|
107
|
+
has_rdoc:
|