embulk-filter-query_string_ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 591ce72de6e39b9f5f79783bd0a44cb51c6e39eb
4
+ data.tar.gz: a176d29d7f286ddc380b27e9daa576a86fdb8792
5
+ SHA512:
6
+ metadata.gz: 19063b65528f5753907de808958a3179ed07f864b9104736b39a410b64c0c7f7ec940e89bfbf6819f8597ecedd96954ff2063eef89a42892c8c5bc9205facf23
7
+ data.tar.gz: 3a8c26a48f962ff0833900152e02bba91dc047fc4f6adc201361492c383fa03b23f58fffa357a6583bf317b562b105bdfae791a8476f330da254a42afb7e06f7
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
3
+
4
+ gem 'addressable', require: 'addressable/uri'
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ embulk-filter-query_string_ruby (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ addressable (2.5.0)
10
+ public_suffix (~> 2.0, >= 2.0.2)
11
+ embulk (0.8.16)
12
+ jruby-jars (= 9.1.5.0)
13
+ embulk (0.8.16-java)
14
+ bundler (>= 1.10.6)
15
+ liquid (~> 3.0.6)
16
+ msgpack (~> 0.7.3)
17
+ rjack-icu (~> 4.54.1.1)
18
+ jruby-jars (9.1.5.0)
19
+ liquid (3.0.6)
20
+ msgpack (0.7.6-java)
21
+ public_suffix (2.0.5)
22
+ rake (12.0.0)
23
+ rjack-icu (4.54.1.1-java)
24
+
25
+ PLATFORMS
26
+ java
27
+ ruby
28
+
29
+ DEPENDENCIES
30
+ addressable
31
+ bundler (>= 1.10.6)
32
+ embulk (>= 0.8.14)
33
+ embulk-filter-query_string_ruby!
34
+ rake (>= 10.0)
35
+
36
+ BUNDLED WITH
37
+ 1.14.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Query String Ruby filter plugin for Embulk
2
+
3
+ TODO: Write short description here and embulk-filter-query_string_ruby.gemspec file.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ - **column**: description (string, required)
12
+ - **schema**: description (array, default: `[]`, required)
13
+
14
+ ## Example
15
+
16
+ sample data
17
+ ```
18
+ id,account,time,purchase,comment,query
19
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,http://hoge.com?hoge=aa&fuga=1
20
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,?hoge=aa&fuga=1
21
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",hoge=aa&fuga=1&piyo=2017-10-01
22
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,hoge=aafuga=1
23
+
24
+ ```
25
+
26
+ configuration
27
+ ```yaml
28
+ filters:
29
+ - type: query_string_ruby
30
+ column: query_string
31
+ query_params:
32
+ - {name: hoge, type: string}
33
+ - {name: fuga, type: long}
34
+ - {name: piyo, type: timestamp, format: '%Y-%m-%d'}
35
+ ```
36
+
37
+ result
38
+ ```
39
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
40
+ | id:long | account:long | time:timestamp | purchase:timestamp | comment:string | query:string | hoge:string | fuga:long | piyo:timestamp |
41
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
42
+ | 1 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | embulk | http://hoge.com?hoge=aa&fuga=1 | aa | 1 | |
43
+ | 2 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | embulk jruby | ?hoge=aa&fuga=1 | aa | 1 | |
44
+ | 3 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | Embulk "csv" parser plugin | hoge=aa&fuga=1&piyo=2017-10-01 | aa | 1 | 2017-09-30 15:00:00 UTC |
45
+ | 4 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | | hoge=aafuga=1 | aafuga=1 | | |
46
+ +---------+--------------+-------------------------+-------------------------+----------------------------+--------------------------------+-------------+-----------+-------------------------+
47
+ ```
48
+
49
+
50
+ ## Build
51
+
52
+ ```
53
+ $ rake
54
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-query_string_ruby"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Yuma Murata"]
6
+ spec.summary = "Query String Ruby filter plugin for Embulk"
7
+ spec.description = "Query String Ruby"
8
+ spec.email = ["murata@ebisol.co.jp"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/murata/embulk-filter-query_string_ruby"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.14']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
Binary file
data/example/seed.yml ADDED
@@ -0,0 +1,14 @@
1
+ in:
2
+ type: file
3
+ path_prefix: "./example/csv/sample_"
4
+
5
+ filters:
6
+ - type: query_string_ruby
7
+ column: query
8
+ schema:
9
+ - {name: hoge, type: string}
10
+ - {name: fuga, type: long}
11
+ - {name: piyo, type: timestamp, format: '%Y-%m-%d'}
12
+
13
+ out:
14
+ type: stdout
@@ -0,0 +1,72 @@
1
+ require "addressable/uri"
2
+
3
+ module Embulk
4
+ module Filter
5
+
6
+ class QueryStringRuby < FilterPlugin
7
+ Plugin.register_filter("query_string_ruby", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ "target_column" => in_schema.find{|c| c.name == config.param("column", :string)},
12
+ "schema" => config.param("schema", :array, :default => [])
13
+ }
14
+
15
+ out_columns = in_schema + task["schema"].map {|col| Column.new(nil, col["name"], col["type"].to_sym, col["format"])}
16
+ yield(task, out_columns)
17
+ end
18
+
19
+ def init
20
+ @schema = task["schema"]
21
+ @target_column = task["target_column"]
22
+ end
23
+
24
+ def close
25
+ end
26
+
27
+ def add(page)
28
+ page.each do |record|
29
+ q = query_parser(record[@target_column["index"]])
30
+ add_record = make_record(@schema, q)
31
+ page_builder.add(record + add_record)
32
+ end
33
+ end
34
+
35
+ def finish
36
+ page_builder.finish
37
+ end
38
+
39
+ private
40
+
41
+ def query_parser(query_string)
42
+ u = Addressable::URI.parse(query_string)
43
+ uri = u.query ? u : Addressable::URI.parse("?#{query_string}")
44
+ return uri.query_values(Hash)
45
+ end
46
+
47
+ def make_record(schema, query)
48
+ return schema.map do |col|
49
+ v = query[col["name"]]
50
+ if v
51
+ begin
52
+ case col["type"]
53
+ when "long"
54
+ v.to_i
55
+ when "double"
56
+ v.to_f
57
+ when "timestamp"
58
+ Time.strptime(v, col["format"])
59
+ else
60
+ v.to_s
61
+ end
62
+ rescue => e
63
+ raise ConfigError.new("Cast failed '#{v}' as '#{col["type"]}' (query name is '#{col["name"]}')")
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-query_string_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yuma Murata
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.14
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.14
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.10.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description: Query String Ruby
56
+ email:
57
+ - murata@ebisol.co.jp
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - Gemfile
63
+ - Gemfile.lock
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-filter-query_string_ruby.gemspec
68
+ - example/csv/sample_01.csv.gz
69
+ - example/seed.yml
70
+ - lib/embulk/filter/query_string_ruby.rb
71
+ homepage: https://github.com/murata/embulk-filter-query_string_ruby
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.5.1
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Query String Ruby filter plugin for Embulk
95
+ test_files: []