embulk-filter-pherialize 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1b4033c2bc58cb20f6b8167b9ed8171f12e1e5d
4
+ data.tar.gz: 65e6d4a0feb6543863109b84e2e38a58ad71fa54
5
+ SHA512:
6
+ metadata.gz: d10b9bc70b02ff1471e567345689ee06ba26d7a46fe4b96f39c176a86ba7c2028b751a432ba5d0e23ec73808838361e851e94fbeb4bab9f3ad547ff5e975a65a
7
+ data.tar.gz: 67867d5e02ea19c73d1df8639070ae371163bba20fefb30a1184dbde496b27a0668a6eaabf1bf64ccf7d21caa5e7b85ade506a17c671918f77750149903916d1
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,35 @@
1
+ # Pherialize filter plugin for Embulk
2
+
3
+ deserialize PHP serialized strings to extract values as new column.
4
+
5
+ see. [keichan34/php-serialize](https://github.com/keichan34/php-serialize)
6
+
7
+ ## Overview
8
+
9
+ * **Plugin type**: filter
10
+
11
+ ## Configuration
12
+
13
+ - **serialized_column**: target serialized column (string, required)
14
+ - **extract_fields**: out key name (array, default: [])
15
+ - **drop_serialized_column**: drop serialized column from out schema (boolean, default: false)
16
+
17
+ ## Example
18
+
19
+ ```yaml
20
+ filters:
21
+ - type: pherialize
22
+ serialized_column: serialized_data
23
+ drop_serialized_column: true
24
+ extract_fields:
25
+ - {name: id, type: long}
26
+ - {name: name, type: string}
27
+ out:
28
+ type: stdout
29
+ ```
30
+
31
+ ## Build
32
+
33
+ ```
34
+ $ rake
35
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,22 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = 'embulk-filter-pherialize'
4
+ spec.version = '0.0.1'
5
+ spec.authors = ['cynipe']
6
+ spec.summary = 'Pherialize filter plugin for Embulk'
7
+ spec.description = 'Embulk plugin that deserialize PHP serialized strings to extract values as new column'
8
+ spec.email = ['cynipe@gmail.com']
9
+ spec.licenses = ['MIT']
10
+ spec.homepage = 'https://github.com/cynipe/embulk-filter-pherialize'
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir['classpath/*.jar']
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ['lib']
15
+
16
+ spec.add_dependency 'k-php-serialize', ['~> 1.2.1']
17
+
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.8']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ spec.add_development_dependency 'pry'
22
+ end
@@ -0,0 +1,56 @@
1
+ require 'php_serialize'
2
+
3
+ module Embulk
4
+ module Filter
5
+ #
6
+ class Pherialize < FilterPlugin
7
+ Plugin.register_filter('pherialize', self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ 'serialized_column' => config.param('serialized_column', :string),
12
+ 'extract_fields' => config.param('extract_fields', :array, default: []),
13
+ 'drop_serialized_column' => config.param('drop_serialized_column', :bool, default: false),
14
+ }
15
+ index = 0
16
+ out_schema = in_schema.sort_by(&:index).reduce([]) do |mem, col|
17
+ next mem if task['drop_serialized_column'] && col.name == task['serialized_column']
18
+ mem << col.tap do |c|
19
+ c.index = index
20
+ index += 1
21
+ end
22
+ mem
23
+ end
24
+ size = out_schema.size
25
+ out_schema += task['extract_fields'].map.each_with_index do |f, i|
26
+ name = (in_schema.names.include? f['name']) ? "_#{f['name']}" : f['name']
27
+ Column.new(size + i, name, f['type'].to_sym)
28
+ end
29
+ yield(task, out_schema)
30
+ end
31
+
32
+ def init
33
+ @serialized_column = task['serialized_column']
34
+ @extract_fields = task['extract_fields']
35
+ @drop_serialized_column = task['drop_serialized_column']
36
+ end
37
+
38
+ def close
39
+ end
40
+
41
+ def add(page)
42
+ target = page.schema.find { |s| s.name == @serialized_column }
43
+ page.each do |record|
44
+ serialized = @drop_serialized_column ? record.delete_at(target.index) : record[target.index]
45
+ data = PHP.unserialize(serialized)
46
+ result = @extract_fields.map { |f| data[f['name']] }
47
+ page_builder.add(record + result)
48
+ end
49
+ end
50
+
51
+ def finish
52
+ page_builder.finish
53
+ end
54
+ end
55
+ end
56
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-pherialize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - cynipe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 1.2.1
19
+ name: k-php-serialize
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.1
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.8
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.8
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: pry
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Embulk plugin that deserialize PHP serialized strings to extract values as new column
84
+ email:
85
+ - cynipe@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".ruby-version"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - embulk-filter-pherialize.gemspec
97
+ - lib/embulk/filter/pherialize.rb
98
+ homepage: https://github.com/cynipe/embulk-filter-pherialize
99
+ licenses:
100
+ - MIT
101
+ metadata: {}
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 2.4.8
119
+ signing_key:
120
+ specification_version: 4
121
+ summary: Pherialize filter plugin for Embulk
122
+ test_files: []