embulk-filter-pherialize 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1b4033c2bc58cb20f6b8167b9ed8171f12e1e5d
4
+ data.tar.gz: 65e6d4a0feb6543863109b84e2e38a58ad71fa54
5
+ SHA512:
6
+ metadata.gz: d10b9bc70b02ff1471e567345689ee06ba26d7a46fe4b96f39c176a86ba7c2028b751a432ba5d0e23ec73808838361e851e94fbeb4bab9f3ad547ff5e975a65a
7
+ data.tar.gz: 67867d5e02ea19c73d1df8639070ae371163bba20fefb30a1184dbde496b27a0668a6eaabf1bf64ccf7d21caa5e7b85ade506a17c671918f77750149903916d1
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,35 @@
1
+ # Pherialize filter plugin for Embulk
2
+
3
+ deserialize PHP serialized strings to extract values as new column.
4
+
5
+ see. [keichan34/php-serialize](https://github.com/keichan34/php-serialize)
6
+
7
+ ## Overview
8
+
9
+ * **Plugin type**: filter
10
+
11
+ ## Configuration
12
+
13
+ - **serialized_column**: target serialized column (string, required)
14
+ - **extract_fields**: out key name (array, default: [])
15
+ - **drop_serialized_column**: drop serialized column from out schema (boolean, default: false)
16
+
17
+ ## Example
18
+
19
+ ```yaml
20
+ filters:
21
+ - type: pherialize
22
+ serialized_column: serialized_data
23
+ drop_serialized_column: true
24
+ extract_fields:
25
+ - {name: id, type: long}
26
+ - {name: name, type: string}
27
+ out:
28
+ type: stdout
29
+ ```
30
+
31
+ ## Build
32
+
33
+ ```
34
+ $ rake
35
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,22 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = 'embulk-filter-pherialize'
4
+ spec.version = '0.0.1'
5
+ spec.authors = ['cynipe']
6
+ spec.summary = 'Pherialize filter plugin for Embulk'
7
+ spec.description = 'Embulk plugin that deserialize PHP serialized strings to extract values as new column'
8
+ spec.email = ['cynipe@gmail.com']
9
+ spec.licenses = ['MIT']
10
+ spec.homepage = 'https://github.com/cynipe/embulk-filter-pherialize'
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir['classpath/*.jar']
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ['lib']
15
+
16
+ spec.add_dependency 'k-php-serialize', ['~> 1.2.1']
17
+
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.8']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ spec.add_development_dependency 'pry'
22
+ end
@@ -0,0 +1,56 @@
1
+ require 'php_serialize'
2
+
3
+ module Embulk
4
+ module Filter
5
+ #
6
+ class Pherialize < FilterPlugin
7
+ Plugin.register_filter('pherialize', self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ 'serialized_column' => config.param('serialized_column', :string),
12
+ 'extract_fields' => config.param('extract_fields', :array, default: []),
13
+ 'drop_serialized_column' => config.param('drop_serialized_column', :bool, default: false),
14
+ }
15
+ index = 0
16
+ out_schema = in_schema.sort_by(&:index).reduce([]) do |mem, col|
17
+ next mem if task['drop_serialized_column'] && col.name == task['serialized_column']
18
+ mem << col.tap do |c|
19
+ c.index = index
20
+ index += 1
21
+ end
22
+ mem
23
+ end
24
+ size = out_schema.size
25
+ out_schema += task['extract_fields'].map.each_with_index do |f, i|
26
+ name = (in_schema.names.include? f['name']) ? "_#{f['name']}" : f['name']
27
+ Column.new(size + i, name, f['type'].to_sym)
28
+ end
29
+ yield(task, out_schema)
30
+ end
31
+
32
+ def init
33
+ @serialized_column = task['serialized_column']
34
+ @extract_fields = task['extract_fields']
35
+ @drop_serialized_column = task['drop_serialized_column']
36
+ end
37
+
38
+ def close
39
+ end
40
+
41
+ def add(page)
42
+ target = page.schema.find { |s| s.name == @serialized_column }
43
+ page.each do |record|
44
+ serialized = @drop_serialized_column ? record.delete_at(target.index) : record[target.index]
45
+ data = PHP.unserialize(serialized)
46
+ result = @extract_fields.map { |f| data[f['name']] }
47
+ page_builder.add(record + result)
48
+ end
49
+ end
50
+
51
+ def finish
52
+ page_builder.finish
53
+ end
54
+ end
55
+ end
56
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-pherialize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - cynipe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 1.2.1
19
+ name: k-php-serialize
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.1
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.8
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.8
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: pry
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Embulk plugin that deserialize PHP serialized strings to extract values as new column
84
+ email:
85
+ - cynipe@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".ruby-version"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - embulk-filter-pherialize.gemspec
97
+ - lib/embulk/filter/pherialize.rb
98
+ homepage: https://github.com/cynipe/embulk-filter-pherialize
99
+ licenses:
100
+ - MIT
101
+ metadata: {}
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 2.4.8
119
+ signing_key:
120
+ specification_version: 4
121
+ summary: Pherialize filter plugin for Embulk
122
+ test_files: []