embulk-plugin-input-hbase 0.0.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +42 -0
  3. data/lib/embulk/input_hbase.rb +67 -0
  4. metadata +73 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 129327d9138c961f83ee26a91824f0022612bff1
4
+ data.tar.gz: a3312beeaee9a9f5da3a62284b41c7829095ca57
5
+ SHA512:
6
+ metadata.gz: a903a3d04fae3a12e2e77be45be3b2a9dbc4c6cd7daf457c131e430d03b9660fe6b5e3a7d95bdeaaf3cf7d0061b88d40ec29ce397706efa373353a12a0383e29
7
+ data.tar.gz: 4e3a371d64df8228199471d719ebb47bba4e2b13dd11a48a2653c3625b098bfff050ee1b64dfc460e10acd976ce736b2eebde009d4d7529cdc02b8c3bb6e8d6b
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # embulk-plugin-input-hbase
2
+
3
+ ## Example
4
+
5
+ HBase table:
6
+
7
+ ```ruby
8
+ hbase(main):029:0> scan 'example:test'
9
+ ROW COLUMN+CELL
10
+ r1 column=foo:dig, timestamp=1422458241976, value=\x00\x00\x00\x00\x00\x00\x00\x01
11
+ r2 column=foo:dig, timestamp=1422458257028, value=\x00\x00\x00\x00\x00\x00\x00\x02
12
+ r2 column=foo:str, timestamp=1422458830978, value=hello
13
+ r3 column=foo:str, timestamp=1422458270762, value=hey
14
+ 3 row(s) in 0.0860 seconds
15
+ ```
16
+
17
+ Embulk config:
18
+
19
+ ```yaml
20
+ in:
21
+ type: hbase
22
+ host: localhost
23
+ table: 'example:test'
24
+ columns:
25
+ - {name: 'foo:dig', type: long}
26
+ - {name: 'foo:str', type: string}
27
+ out:
28
+ type: stdout
29
+ ```
30
+
31
+ Embulk preview:
32
+
33
+ ```bash
34
+ $ java -jar embulk.jar preview -C $(hbase classpath) example.yml
35
+ +--------------+----------------+
36
+ | foo:dig:long | foo:str:string |
37
+ +--------------+----------------+
38
+ | 1 | |
39
+ | 2 | hello |
40
+ | | hey |
41
+ +--------------+----------------+
42
+ ```
@@ -0,0 +1,67 @@
1
+ require 'java'
2
+
3
+ java_import org.apache.hadoop.hbase.HBaseConfiguration
4
+ java_import org.apache.hadoop.hbase.client.HConnectionManager
5
+ java_import org.apache.hadoop.hbase.client.Scan
6
+ java_import org.apache.hadoop.hbase.util.Bytes
7
+ java_import org.apache.hadoop.hbase.CellUtil
8
+
9
+ module Embulk
10
+ class InputHBase < InputPlugin
11
+ Plugin.register_input('hbase', self)
12
+
13
+ def self.transaction(config, &control)
14
+ task = {
15
+ 'host' => config.param('host', :string, default: 'localhost'),
16
+ 'table' => config.param('table', :string)
17
+ }
18
+ threads = 1
19
+ columns = config.param('columns', :array).map.with_index { |column, i|
20
+ Column.new(i, column['name'], column['type'].to_sym)
21
+ }
22
+ commit_reports = yield(task, columns, threads)
23
+ return {}
24
+ end
25
+
26
+ def initialize(task, schema, index, page_builder)
27
+ super
28
+ end
29
+
30
+ def run
31
+ conf = HBaseConfiguration.create
32
+ conf.set('hbase.zookeeper.quorum', @task['host'])
33
+ connection = HConnectionManager.createConnection(conf)
34
+ table = connection.getTable(@task['table'])
35
+ scan = Scan.new
36
+ scanner = table.getScanner(scan)
37
+ scanner.each { |result|
38
+ @page_builder.add(@schema.map { |column|
39
+ family, qualifier = column.name.split(':').map {|e|
40
+ Bytes.toBytes(e)
41
+ }
42
+ raw = nil
43
+ if result.containsColumn(family, qualifier) then
44
+ cell = result.getColumnLatestCell(family, qualifier)
45
+ raw = CellUtil.cloneValue(cell)
46
+ end
47
+ if raw then
48
+ case column.type
49
+ when :long
50
+ Bytes.toLong(raw)
51
+ when :string
52
+ Bytes.toString(raw)
53
+ else
54
+ raw
55
+ end
56
+ else
57
+ nil
58
+ end
59
+ })
60
+ }
61
+ @page_builder.finish
62
+ commit_report = {
63
+ }
64
+ return commit_report
65
+ end
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-plugin-input-hbase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: java
6
+ authors:
7
+ - Shun Takebayashi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.9.2
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ description: Embulk plugin for HBase input
42
+ email: shun@takebayashi.asia
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/embulk/input_hbase.rb
48
+ - README.md
49
+ homepage: https://github.com/takebayashi/embulk-plugin-input-hbase
50
+ licenses:
51
+ - Apache 2.0
52
+ metadata: {}
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 2.1.9
70
+ signing_key:
71
+ specification_version: 4
72
+ summary: Embulk plugin for HBase input
73
+ test_files: []