embulk-plugin-input-hbase 0.0.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +42 -0
  3. data/lib/embulk/input_hbase.rb +67 -0
  4. metadata +73 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 129327d9138c961f83ee26a91824f0022612bff1
4
+ data.tar.gz: a3312beeaee9a9f5da3a62284b41c7829095ca57
5
+ SHA512:
6
+ metadata.gz: a903a3d04fae3a12e2e77be45be3b2a9dbc4c6cd7daf457c131e430d03b9660fe6b5e3a7d95bdeaaf3cf7d0061b88d40ec29ce397706efa373353a12a0383e29
7
+ data.tar.gz: 4e3a371d64df8228199471d719ebb47bba4e2b13dd11a48a2653c3625b098bfff050ee1b64dfc460e10acd976ce736b2eebde009d4d7529cdc02b8c3bb6e8d6b
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # embulk-plugin-input-hbase
2
+
3
+ ## Example
4
+
5
+ HBase table:
6
+
7
+ ```ruby
8
+ hbase(main):029:0> scan 'example:test'
9
+ ROW COLUMN+CELL
10
+ r1 column=foo:dig, timestamp=1422458241976, value=\x00\x00\x00\x00\x00\x00\x00\x01
11
+ r2 column=foo:dig, timestamp=1422458257028, value=\x00\x00\x00\x00\x00\x00\x00\x02
12
+ r2 column=foo:str, timestamp=1422458830978, value=hello
13
+ r3 column=foo:str, timestamp=1422458270762, value=hey
14
+ 3 row(s) in 0.0860 seconds
15
+ ```
16
+
17
+ Embulk config:
18
+
19
+ ```yaml
20
+ in:
21
+ type: hbase
22
+ host: localhost
23
+ table: 'example:test'
24
+ columns:
25
+ - {name: 'foo:dig', type: long}
26
+ - {name: 'foo:str', type: string}
27
+ out:
28
+ type: stdout
29
+ ```
30
+
31
+ Embulk preview:
32
+
33
+ ```bash
34
+ $ java -jar embulk.jar preview -C $(hbase classpath) example.yml
35
+ +--------------+----------------+
36
+ | foo:dig:long | foo:str:string |
37
+ +--------------+----------------+
38
+ | 1 | |
39
+ | 2 | hello |
40
+ | | hey |
41
+ +--------------+----------------+
42
+ ```
@@ -0,0 +1,67 @@
1
+ require 'java'
2
+
3
+ java_import org.apache.hadoop.hbase.HBaseConfiguration
4
+ java_import org.apache.hadoop.hbase.client.HConnectionManager
5
+ java_import org.apache.hadoop.hbase.client.Scan
6
+ java_import org.apache.hadoop.hbase.util.Bytes
7
+ java_import org.apache.hadoop.hbase.CellUtil
8
+
9
+ module Embulk
10
+ class InputHBase < InputPlugin
11
+ Plugin.register_input('hbase', self)
12
+
13
+ def self.transaction(config, &control)
14
+ task = {
15
+ 'host' => config.param('host', :string, default: 'localhost'),
16
+ 'table' => config.param('table', :string)
17
+ }
18
+ threads = 1
19
+ columns = config.param('columns', :array).map.with_index { |column, i|
20
+ Column.new(i, column['name'], column['type'].to_sym)
21
+ }
22
+ commit_reports = yield(task, columns, threads)
23
+ return {}
24
+ end
25
+
26
+ def initialize(task, schema, index, page_builder)
27
+ super
28
+ end
29
+
30
+ def run
31
+ conf = HBaseConfiguration.create
32
+ conf.set('hbase.zookeeper.quorum', @task['host'])
33
+ connection = HConnectionManager.createConnection(conf)
34
+ table = connection.getTable(@task['table'])
35
+ scan = Scan.new
36
+ scanner = table.getScanner(scan)
37
+ scanner.each { |result|
38
+ @page_builder.add(@schema.map { |column|
39
+ family, qualifier = column.name.split(':').map {|e|
40
+ Bytes.toBytes(e)
41
+ }
42
+ raw = nil
43
+ if result.containsColumn(family, qualifier) then
44
+ cell = result.getColumnLatestCell(family, qualifier)
45
+ raw = CellUtil.cloneValue(cell)
46
+ end
47
+ if raw then
48
+ case column.type
49
+ when :long
50
+ Bytes.toLong(raw)
51
+ when :string
52
+ Bytes.toString(raw)
53
+ else
54
+ raw
55
+ end
56
+ else
57
+ nil
58
+ end
59
+ })
60
+ }
61
+ @page_builder.finish
62
+ commit_report = {
63
+ }
64
+ return commit_report
65
+ end
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-plugin-input-hbase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: java
6
+ authors:
7
+ - Shun Takebayashi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.9.2
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ description: Embulk plugin for HBase input
42
+ email: shun@takebayashi.asia
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/embulk/input_hbase.rb
48
+ - README.md
49
+ homepage: https://github.com/takebayashi/embulk-plugin-input-hbase
50
+ licenses:
51
+ - Apache 2.0
52
+ metadata: {}
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 2.1.9
70
+ signing_key:
71
+ specification_version: 4
72
+ summary: Embulk plugin for HBase input
73
+ test_files: []