embulk-output-groonga 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3a982df5a40609112ffe3d24a191986a450ba722
4
+ data.tar.gz: ed668bb7ed6768fe11e322d9eb6385139fcdef65
5
+ SHA512:
6
+ metadata.gz: 3b32c1340d87ff3bd5988e192887b812e7280e2c5c8b8fc390a1d6df1113c9cbda13ee562394f6754bdf2c7a4771a6bab9cf3678f54745ee796e1edd36215946
7
+ data.tar.gz: 97c5ea885c1986ca37a75fc4fbaa47957519d24d332f5dbc25a52ab155f5d0d50a005297de225deb019ae3a3d39fd43d309d6d16ebec6499c0a41c402076cfc5
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/CHANGES.md ADDED
@@ -0,0 +1,4 @@
1
+ 0.1.1 (2015-08-10)
2
+ ------------------
3
+ * First release
4
+
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Groonga output plugin for Embulk
2
+
3
+ Embulk output plugin to load insert data into groonga (full text search engine)
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: no
9
+ * **Resume supported**: no
10
+ * **Cleanup supported**: yes
11
+
12
+ ## Configuration
13
+
14
+ - **table**: output table name (string, required)
15
+ - **key_column**: this column convert column name into _key (string, required)
16
+ - **host**: groonga server name (string, rerquired)
17
+ - **port**: groonga port (integer, default: 10041)
18
+ - **protocol**: connection protocol gqtp or http, (string, default: http )
19
+
20
+
21
+ ## Installation
22
+
23
+ * install embulk command isself.
24
+ * install embulk-output-groonga.
25
+
26
+ ### Embulk install
27
+
28
+ install [embulk](https://github.com/embulk/embulk#quick-start)
29
+ For libyajl2 installation, *Embulk 0.6.10 required*.
30
+
31
+ ### install gem from github master.
32
+
33
+ install embulk-output-groonga
34
+
35
+ ```
36
+ git clone https://github.com/hiroyuki-sato/embulk-output-groonga.git
37
+ cd embulk-output-groonga
38
+ rake build
39
+ embulk gem install pkg/embulk-output-groonga-0.1.0.gem
40
+ ```
41
+
42
+ ## Example
43
+
44
+ Installation step are the following.
45
+
46
+ ```yaml
47
+ out:
48
+ type: groonga
49
+ table: Site
50
+ key_column: key
51
+ host: localhost
52
+ port: 10041
53
+ protocol: http
54
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-output-groonga"
4
+ spec.version = "0.1.1"
5
+ spec.authors = ["Hiroyuki Sato"]
6
+ spec.summary = "Groonga output plugin for Embulk"
7
+ spec.description = "Dumps records to Groonga."
8
+ spec.email = ["hiroysato@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/hiroyuki-sato/embulk-output-groonga"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'groonga-command-parser', ['>= 0.1.4']
17
+ spec.add_dependency 'groonga-client', ['>= 0.1.8']
18
+ spec.add_development_dependency 'bundler', ['~> 1.0']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,120 @@
1
+ require 'groonga/client'
2
+
3
+ module Embulk
4
+ module Output
5
+
6
+ FLUSH_SIZE = 1_000
7
+ class GroongaOutputPlugin < OutputPlugin
8
+ Plugin.register_output("groonga", self)
9
+
10
+ def self.transaction(config, schema, count, &control)
11
+ # configuration code:
12
+ task = {
13
+ "host" => config.param("host", :string),
14
+ "port" => config.param("port", :integer, default: 10041),
15
+ "protocol" => config.param("protocol", :string, default: 'http'),
16
+ "key_column" => config.param("key_column",:string),
17
+ "table" => config.param("table",:string),
18
+ # "create_table" => config.param("create_table",:string)
19
+ }
20
+ prot = task['protocol']
21
+ raise RuntimeError,"Unknown protocol #{prot}. supported protocol: gqtp, http" unless %w[gqtp http].include?(prot)
22
+
23
+ # resumable output:
24
+ # resume(task, schema, count, &control)
25
+
26
+ # non-resumable output:
27
+ commit_reports = yield(task)
28
+ next_config_diff = {}
29
+ return next_config_diff
30
+ end
31
+
32
+ #def self.resume(task, schema, count, &control)
33
+ # commit_reports = yield(task)
34
+ #
35
+ # next_config_diff = {}
36
+ # return next_config_diff
37
+ #end
38
+
39
+ def init
40
+ # initialization code:
41
+ host = task["host"]
42
+ port = task["port"]
43
+ protocol = task["protocol"].to_sym
44
+ @client = Groonga::Client.open({:host => host,
45
+ :port => port,
46
+ :protocol => protocol})
47
+ @key_column = task["key_column"]
48
+ @out_table = task["table"]
49
+
50
+ # create_table
51
+ end
52
+
53
+ def close
54
+ @client.close
55
+ end
56
+
57
+ def add(page)
58
+ # output code:
59
+ records = []
60
+ idx = 0
61
+ page.each_with_index do |record,idx|
62
+ hash = Hash[schema.names.zip(record)]
63
+ v = hash.delete(@key_column)
64
+ hash['_key'] = v
65
+ records << hash
66
+ if( idx > 0 && idx % FLUSH_SIZE == 0 )
67
+ ret = @client.load({:table => @out_table,
68
+ :values => records })
69
+ Embulk.logger.info "groonga inserted #{ret.body} / #{records.size}"
70
+ if( ret.body != records.size )
71
+ raise RuntimeError,"inserted value does not match"
72
+ end
73
+ records.clear
74
+ end
75
+ end
76
+ if( records.size > 0 )
77
+ ret = @client.load({:table => @out_table,
78
+ :values => records })
79
+ Embulk.logger.info "groonga inserted #{ret.body} / #{records.size}"
80
+ if( ret.body != records.size )
81
+ raise RuntimeError,"inserted value does not match"
82
+ end
83
+ end
84
+ end
85
+
86
+ def finish
87
+ end
88
+
89
+ def abort
90
+ end
91
+
92
+ def commit
93
+ commit_report = {}
94
+ return commit_report
95
+ end
96
+ private
97
+ def table_names
98
+ # TODO Error check
99
+ @client.table_list.map{ |x| x['name'] }
100
+ end
101
+
102
+ def table_exist?(name)
103
+ # TODO Error check
104
+ table_names.include?(name)
105
+ end
106
+
107
+ def create_table
108
+ return if table_exist?(@out_table)
109
+ create_table = @task['create_table']
110
+
111
+ # TODO Error check
112
+
113
+ @client.execute(create_table)
114
+
115
+ end
116
+
117
+ end
118
+
119
+ end
120
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-groonga
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Hiroyuki Sato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-12-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: groonga-command-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.1.4
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.1.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: groonga-client
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.8
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.1.8
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Dumps records to Groonga.
70
+ email:
71
+ - hiroysato@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - CHANGES.md
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-output-groonga.gemspec
83
+ - lib/embulk/output/groonga.rb
84
+ homepage: https://github.com/hiroyuki-sato/embulk-output-groonga
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.4.5.1
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Groonga output plugin for Embulk
108
+ test_files: []
109
+ has_rdoc: