embulk-filter-query_string_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 591ce72de6e39b9f5f79783bd0a44cb51c6e39eb
4
+ data.tar.gz: a176d29d7f286ddc380b27e9daa576a86fdb8792
5
+ SHA512:
6
+ metadata.gz: 19063b65528f5753907de808958a3179ed07f864b9104736b39a410b64c0c7f7ec940e89bfbf6819f8597ecedd96954ff2063eef89a42892c8c5bc9205facf23
7
+ data.tar.gz: 3a8c26a48f962ff0833900152e02bba91dc047fc4f6adc201361492c383fa03b23f58fffa357a6583bf317b562b105bdfae791a8476f330da254a42afb7e06f7
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
3
+
4
+ gem 'addressable', require: 'addressable/uri'
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ embulk-filter-query_string_ruby (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ addressable (2.5.0)
10
+ public_suffix (~> 2.0, >= 2.0.2)
11
+ embulk (0.8.16)
12
+ jruby-jars (= 9.1.5.0)
13
+ embulk (0.8.16-java)
14
+ bundler (>= 1.10.6)
15
+ liquid (~> 3.0.6)
16
+ msgpack (~> 0.7.3)
17
+ rjack-icu (~> 4.54.1.1)
18
+ jruby-jars (9.1.5.0)
19
+ liquid (3.0.6)
20
+ msgpack (0.7.6-java)
21
+ public_suffix (2.0.5)
22
+ rake (12.0.0)
23
+ rjack-icu (4.54.1.1-java)
24
+
25
+ PLATFORMS
26
+ java
27
+ ruby
28
+
29
+ DEPENDENCIES
30
+ addressable
31
+ bundler (>= 1.10.6)
32
+ embulk (>= 0.8.14)
33
+ embulk-filter-query_string_ruby!
34
+ rake (>= 10.0)
35
+
36
+ BUNDLED WITH
37
+ 1.14.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Query String Ruby filter plugin for Embulk
2
+
3
+ TODO: Write short description here and embulk-filter-query_string_ruby.gemspec file.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ - **column**: description (string, required)
12
+ - **schema**: description (array, default: `[]`, required)
13
+
14
+ ## Example
15
+
16
+ sample data
17
+ ```
18
+ id,account,time,purchase,comment,query
19
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,http://hoge.com?hoge=aa&fuga=1
20
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,?hoge=aa&fuga=1
21
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",hoge=aa&fuga=1&piyo=2017-10-01
22
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,hoge=aafuga=1
23
+
24
+ ```
25
+
26
+ configuration
27
+ ```yaml
28
+ filters:
29
+ - type: query_string_ruby
30
+ column: query_string
31
+ query_params:
32
+ - {name: hoge, type: string}
33
+ - {name: fuga, type: long}
34
+ - {name: piyo, type: timestamp, format: '%Y-%m-%d'}
35
+ ```
36
+
37
+ result
38
+ ```
39
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
40
+ | id:long | account:long | time:timestamp | purchase:timestamp | comment:string | query:string | hoge:string | fuga:long | piyo:timestamp |
41
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
42
+ | 1 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | embulk | http://hoge.com?hoge=aa&fuga=1 | aa | 1 | |
43
+ | 2 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | embulk jruby | ?hoge=aa&fuga=1 | aa | 1 | |
44
+ | 3 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | Embulk "csv" parser plugin | hoge=aa&fuga=1&piyo=2017-10-01 | aa | 1 | 2017-09-30 15:00:00 UTC |
45
+ | 4 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | | hoge=aafuga=1 | aafuga=1 | | |
46
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
47
+ ```
48
+
49
+
50
+ ## Build
51
+
52
+ ```
53
+ $ rake
54
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-query_string_ruby"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Yuma Murata"]
6
+ spec.summary = "Query String Ruby filter plugin for Embulk"
7
+ spec.description = "Query String Ruby"
8
+ spec.email = ["murata@ebisol.co.jp"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/murata/embulk-filter-query_string_ruby"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.14']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
Binary file
data/example/seed.yml ADDED
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: "./example/csv/sample_"
4
+
5
+ filters:
6
+ - type: query_string_ruby
7
+ column: query
8
+ schema:
9
+ - {name: hoge, type: string}
10
+ - {name: fuga, type: long}
11
+ - {name: piyo, type: timestamp, format: '%Y-%m-%d'}
12
+
13
+ out:
14
+ type: stdout
@@ -0,0 +1,72 @@
1
+ require "addressable/uri"
2
+
3
+ module Embulk
4
+ module Filter
5
+
6
+ class QueryStringRuby < FilterPlugin
7
+ Plugin.register_filter("query_string_ruby", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ "target_column" => in_schema.find{|c| c.name == config.param("column", :string)},
12
+ "schema" => config.param("schema", :array, :default => [])
13
+ }
14
+
15
+ out_columns = in_schema + task["schema"].map {|col| Column.new(nil, col["name"], col["type"].to_sym, col["format"])}
16
+ yield(task, out_columns)
17
+ end
18
+
19
+ def init
20
+ @schema = task["schema"]
21
+ @target_column = task["target_column"]
22
+ end
23
+
24
+ def close
25
+ end
26
+
27
+ def add(page)
28
+ page.each do |record|
29
+ q = query_parser(record[@target_column["index"]])
30
+ add_record = make_record(@schema, q)
31
+ page_builder.add(record + add_record)
32
+ end
33
+ end
34
+
35
+ def finish
36
+ page_builder.finish
37
+ end
38
+
39
+ private
40
+
41
+ def query_parser(query_string)
42
+ u = Addressable::URI.parse(query_string)
43
+ uri = u.query ? u : Addressable::URI.parse("?#{query_string}")
44
+ return uri.query_values(Hash)
45
+ end
46
+
47
+ def make_record(schema, query)
48
+ return schema.map do |col|
49
+ v = query[col["name"]]
50
+ if v
51
+ begin
52
+ case col["type"]
53
+ when "long"
54
+ v.to_i
55
+ when "double"
56
+ v.to_f
57
+ when "timestamp"
58
+ Time.strptime(v, col["format"])
59
+ else
60
+ v.to_s
61
+ end
62
+ rescue => e
63
+ raise ConfigError.new("Cast failed '#{v}' as '#{col["type"]}' (query name is '#{col["name"]}')")
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-query_string_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yuma Murata
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.14
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.14
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description: Query String Ruby
56
+ email:
57
+ - murata@ebisol.co.jp
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - Gemfile
63
+ - Gemfile.lock
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-filter-query_string_ruby.gemspec
68
+ - example/csv/sample_01.csv.gz
69
+ - example/seed.yml
70
+ - lib/embulk/filter/query_string_ruby.rb
71
+ homepage: https://github.com/murata/embulk-filter-query_string_ruby
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.5.1
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Query String Ruby filter plugin for Embulk
95
+ test_files: []