embulk-parser-query_string 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.travis.yml +12 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile +2 -0
- data/LICENSE +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +84 -0
- data/Rakefile +60 -0
- data/embulk-parser-query_string.gemspec +23 -0
- data/lib/embulk/guess/query_string.rb +39 -0
- data/lib/embulk/parser/query_string.rb +79 -0
- data/partial-config.yml +8 -0
- data/test/embulk/guess/test_query_string.rb +122 -0
- data/test/embulk/parser/test_query_string_plugin.rb +120 -0
- data/test/prepare_embulk.rb +16 -0
- data/test/run-test.rb +18 -0
- metadata +170 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 97fb67cc2b3cf2233e6e3fd248033c59edd58b70
|
4
|
+
data.tar.gz: 11879dcdad1b06b3ce746ce33c02816a8267f4c8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f89539141e02dcb74fbf93956d77bb94446aa4b7dcf67c14b7bc1c6ad3862a162c2fde1a5965e49ceea278da8a60e59babcd8d7f7eeb7482b76bc38e8608b096
|
7
|
+
data.tar.gz: 8e1de420daf02fb16f477d9aaa2f3d97fa9dbb7fca7eb86692337b76989a2867e28c603e2c03dfd5feff0c42e89c9e9cfe1c5cc0ad8fc232d2b0b890151f670e
|
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- jruby-19mode
|
4
|
+
addons:
|
5
|
+
code_climate:
|
6
|
+
repo_token:
|
7
|
+
secure: "cSTIvR+haBzyXfAQGGxCjdnLM/iEWFJuVJFqQi0cqr78yZuhJ4A06tJhLIVd7uU+ZRa+JCT/kpD7zZbVc5BC2kmVJybLOXx+bR1SFgT1W2QVQJL7yU6pzPzpT4hr1VQFd8OIKgko8vTBGykXL804F0e/xHKv+FHGMh6TI2CfXqG8cpZpa4oCtnt48V3VUpszNpNuufvfTFJADpI9NvuyosbA3btcH7pY/vbjLTOAd0cSTjZ6Qufy9jV2KwSRyMHXs1m1paErKHrxL7aPxy+rBWqCVeyrU9Tx/Kw/DgJLP9dIPjFvcDHdaUKpcZAcaU87VYFPhcPW7bDIQoeaI7lfrPiBgQTgfZxeiL9vJ2oRUP9NhkhCF18ljhoqI9uTonuyv49zgyAXrLlJQu0cEdC4tW/qsEsB0b56MyctUm8Iq0gWIZO8uI961p/VzkjV6d3/n+QFTUIKnGqf7sRWfxqXFOP6SmlyZtbST4uzUDNw7MWuRRzqmEagQtnzjzmYXCrFpAwn2BG3tPkjrypmlBBjBKsPww/nkeTn25449UvYUwOKRPY3P0amXPdemubiy4jLCWTHUSxhszTrp85Jx6492z50qDo6wU6bw8MaMdrZbxChyNT6ulgbiBaQm/c1bUPNUyrT8ixyxc+/F+SFuILhIvclHjcAEAiRNAyV5LXhb+U="
|
8
|
+
notifications:
|
9
|
+
slack:
|
10
|
+
secure: "QRZ6zQxcRdN2XylIz+b46TbYIv7x5Ko7Y0Mi0aGbf7Wg1xfnS4X2FZKp8gF+RH1qjT6CYsQFUpPBhPVCMx0fP0ik5/gBrdngrj0htludvQ5CTK2oVegM1VC5NSn+gi8bt2F20Ode9fxU5OS1HDzNR9sjPz3W7miMUNDZoYPy77Om/CvmVkBbGyMkWnfwhEvwnL8JpBNpklDvrUUKznNsAcSbKQBMBAglFMds/OaHXCkK142zMbSlHMctch1VDAKhX9/6M3v8v6ZRheX68+VVCz3nTrTF0/lnYh3ig9ey3DNlgkZUqK2gmypnhTE6tpeibHdyVa8qOMNzyAgltu1qIhFHN390WMk9gZXEcvWG3m++PMAIKKtd4varbH1vdCeHOGUV7GRgarcfSMvJ54I9xc5GkUnAhjRKj8xDzYzhkXwApD8eqWb2RgGXUm6G4cKj0MeQ4WBqHr9FOW/4EJcZoztFi0YlP7KE+6xptBJU479KSsv31BwHrMfblObOOj58C7gUfdbTCByxng0u1axxvixL8zJwgHvFND3/4yoXoCCgm0DXBPbi6B5xpt8QmN6K4E5sMGrppTI7VwJqkcwkVD0EEeCedRNiWLTaxlKpLYxYnoXwEnpm9o2WXWa+PjkWeawsnJvV5IAt6oJhaK22VqJN86Macuu+nCdeF1/0TZU="
|
11
|
+
jdk:
|
12
|
+
- oraclejdk8
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
## 0.0.2 - 2015-07-08
|
2
|
+
|
3
|
+
The name of this plugin is changed to "embulk-parser-query_string" from "embulk-parser-query-string".
|
4
|
+
|
5
|
+
* [maintenance] Fix example config in README and sample config [#7](https://github.com/treasure-data/embulk-parser-query_string/pull/7)
|
6
|
+
* [fixed] Decode line correctly [#6](https://github.com/treasure-data/embulk-parser-query_string/pull/6)
|
7
|
+
* [fixed] fall back to guess csv [#5](https://github.com/treasure-data/embulk-parser-query_string/pull/5)
|
8
|
+
* [enhancement] Error handling for parser [#4](https://github.com/treasure-data/embulk-parser-query_string/pull/4)
|
9
|
+
* [maintenance] Use underscore for plugin name [#3](https://github.com/treasure-data/embulk-parser-query_string/pull/3)
|
10
|
+
|
11
|
+
## 0.0.1 - 2015-07-07
|
12
|
+
|
13
|
+
The first release!!
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2015 Everyleaf Corporation
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/treasure-data/embulk-parser-query_string.svg)](https://travis-ci.org/treasure-data/embulk-parser-query_string)
|
2
|
+
[![Code Climate](https://codeclimate.com/github/treasure-data/embulk-parser-query_string/badges/gpa.svg)](https://codeclimate.com/github/treasure-data/embulk-parser-query_string)
|
3
|
+
[![Test Coverage](https://codeclimate.com/github/treasure-data/embulk-parser-query_string/badges/coverage.svg)](https://codeclimate.com/github/treasure-data/embulk-parser-query_string/coverage)
|
4
|
+
|
5
|
+
# Query String parser plugin for [Embulk](http://www.embulk.org)
|
6
|
+
|
7
|
+
Transform `key=value&key2=value2` line to `{key: "value", key2: "value2"}`. (HTTP Query String to Hash)
|
8
|
+
|
9
|
+
Currently, this plugin supports minimum case, some edge cases are unsupported as below.
|
10
|
+
|
11
|
+
- Duplicated key (e.g. `key=1&key=2`)
|
12
|
+
- Array parameter (e.g. `key[]=1&key[]=2`)
|
13
|
+
|
14
|
+
## Overview
|
15
|
+
|
16
|
+
* **Plugin type**: parser
|
17
|
+
* **Guess supported**: yes
|
18
|
+
|
19
|
+
## Configuration
|
20
|
+
|
21
|
+
- **strip_quote**: If you have quoted lines file such as `"foo=FOO&bar=BAR"`, should be true for strip their quotes. (bool, default: true)
|
22
|
+
- **strip_whitespace**: Strip whitespace before parsing lines for any indented line parse correctly such as ' foo=FOO'. (bool, default: true)
|
23
|
+
|
24
|
+
## Example
|
25
|
+
|
26
|
+
You have such text file (`target_file.txt`) as below:
|
27
|
+
|
28
|
+
```text
|
29
|
+
"user_id=42&some_param=ABC"
|
30
|
+
"user_id=43&some_param=EFG"
|
31
|
+
"user_id=44&some_param=XYZ"
|
32
|
+
```
|
33
|
+
|
34
|
+
And you have `partial-config.yml` as below:
|
35
|
+
|
36
|
+
```yaml
|
37
|
+
in:
|
38
|
+
type: file
|
39
|
+
path_prefix: ./target_file
|
40
|
+
parser:
|
41
|
+
strip_quote: true
|
42
|
+
strip_whitespace: true
|
43
|
+
exec: {}
|
44
|
+
out: {type: stdout}
|
45
|
+
```
|
46
|
+
|
47
|
+
Run `embulk guess`.
|
48
|
+
|
49
|
+
```
|
50
|
+
$ embulk guess -g query_string partial-config.yml -o guessed.yml
|
51
|
+
```
|
52
|
+
|
53
|
+
You got guessed.yml as below:
|
54
|
+
|
55
|
+
```yaml
|
56
|
+
in:
|
57
|
+
type: file
|
58
|
+
path_prefix: ./target_file
|
59
|
+
parser:
|
60
|
+
strip_quote: true
|
61
|
+
strip_whitespace: true
|
62
|
+
charset: ISO-8859-2
|
63
|
+
newline: CRLF
|
64
|
+
type: query_string
|
65
|
+
schema:
|
66
|
+
- {name: user_id, type: long}
|
67
|
+
- {name: some_param, type: string}
|
68
|
+
exec: {}
|
69
|
+
out: {type: stdout}
|
70
|
+
```
|
71
|
+
|
72
|
+
Finally, `embulk run` with generated guessed.yml.
|
73
|
+
|
74
|
+
```
|
75
|
+
$ embulk run guessed.yml
|
76
|
+
```
|
77
|
+
|
78
|
+
You can see the parsed records on STDOUT.
|
79
|
+
|
80
|
+
## Install plugin
|
81
|
+
|
82
|
+
```
|
83
|
+
$ embulk gem install embulk-parser-query_string
|
84
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
task default: :test
|
5
|
+
|
6
|
+
desc "Run tests"
|
7
|
+
task :test do
|
8
|
+
ruby("test/run-test.rb", "--use-color=yes")
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :release do
|
12
|
+
desc "Add header of now version release to ChangeLog and bump up version"
|
13
|
+
task :prepare do
|
14
|
+
root_dir = Pathname.new(File.expand_path("../", __FILE__))
|
15
|
+
changelog_file = root_dir.join("CHANGELOG.md")
|
16
|
+
gemspec_file = root_dir.join("embulk-parser-query_string.gemspec")
|
17
|
+
|
18
|
+
system("git fetch origin")
|
19
|
+
|
20
|
+
# detect merged PR
|
21
|
+
old_version = gemspec_file.read[/spec\.version += *"([0-9]+\.[0-9]+\.[0-9]+)"/, 1]
|
22
|
+
pr_numbers = `git log v#{old_version}..origin/master --oneline`.scan(/#[0-9]+/)
|
23
|
+
|
24
|
+
if !$?.success? || pr_numbers.empty?
|
25
|
+
puts "Detecting PR failed. Please confirm if any PR were merged after the latest release."
|
26
|
+
exit(false)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generate new version
|
30
|
+
major, minor, patch = old_version.split(".").map(&:to_i)
|
31
|
+
new_version = "#{major}.#{minor}.#{patch + 1}"
|
32
|
+
|
33
|
+
# Update ChangeLog
|
34
|
+
pr_descriptions = pr_numbers.map do |number|
|
35
|
+
body = open("https://api.github.com/repos/treasure-data/embulk-parser-query_string/issues/#{number.gsub("#", "")}").read
|
36
|
+
payload = JSON.parse(body)
|
37
|
+
"* [] #{payload["title"]} [#{number}](https://github.com/treasure-data/embulk-parser-query_string/pull/#{number.gsub('#', '')})"
|
38
|
+
end.join("\n")
|
39
|
+
|
40
|
+
new_changelog = <<-HEADER
|
41
|
+
## #{new_version} - #{Time.now.strftime("%Y-%m-%d")}
|
42
|
+
#{pr_descriptions}
|
43
|
+
|
44
|
+
#{changelog_file.read.chomp}
|
45
|
+
HEADER
|
46
|
+
|
47
|
+
File.open(changelog_file, "w") {|f| f.write(new_changelog) }
|
48
|
+
|
49
|
+
# Update version.rb
|
50
|
+
old_content = gemspec_file.read
|
51
|
+
File.open(gemspec_file, "w") do |f|
|
52
|
+
f.write old_content.gsub(/(spec\.version += *)".*?"/, %Q!\\1"#{new_version}"!)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Update Gemfile.lock
|
56
|
+
system("bundle install")
|
57
|
+
|
58
|
+
puts "ChangeLog, version and Gemfile.lock were updated. New version is #{new_version}."
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
Gem::Specification.new do |spec|
|
3
|
+
spec.name = "embulk-parser-query_string"
|
4
|
+
spec.version = "0.0.2"
|
5
|
+
spec.authors = ["yoshihara", "uu59"]
|
6
|
+
spec.summary = "Query String parser plugin for Embulk"
|
7
|
+
spec.description = "Parses Query String files read by other file input plugins."
|
8
|
+
spec.email = ["h.yoshihara@everyleaf.com", "k@uu59.org"]
|
9
|
+
spec.licenses = ["Apache2"]
|
10
|
+
spec.homepage = "https://github.com/treasure-data/embulk-parser-query_string"
|
11
|
+
|
12
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
13
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
14
|
+
spec.require_paths = ["lib"]
|
15
|
+
|
16
|
+
spec.add_development_dependency 'embulk', [">= 0.6.13", "< 1.0"]
|
17
|
+
spec.add_development_dependency 'bundler', ['~> 1.0']
|
18
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
19
|
+
spec.add_development_dependency 'pry'
|
20
|
+
spec.add_development_dependency 'test-unit'
|
21
|
+
spec.add_development_dependency 'test-unit-rr'
|
22
|
+
spec.add_development_dependency 'codeclimate-test-reporter'
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "embulk/parser/query_string"
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Guess
|
5
|
+
# $ embulk guess -g "query_string" partial-config.yml
|
6
|
+
|
7
|
+
class QueryString < LineGuessPlugin
|
8
|
+
Plugin.register_guess("query_string", self)
|
9
|
+
|
10
|
+
def guess_lines(config, sample_lines)
|
11
|
+
return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"
|
12
|
+
|
13
|
+
options = {
|
14
|
+
strip_quote: config.param("strip_quote", :bool, default: true),
|
15
|
+
strip_whitespace: config.param("strip_whitespace", :bool, default: true)
|
16
|
+
}
|
17
|
+
records = sample_lines.map do |line|
|
18
|
+
Parser::QueryString.parse(line, options) || {}
|
19
|
+
end
|
20
|
+
format = records.inject({}) do |result, record|
|
21
|
+
record.each_pair do |key, value|
|
22
|
+
(result[key] ||= []) << value
|
23
|
+
end
|
24
|
+
result
|
25
|
+
end
|
26
|
+
guessed = {type: "query_string", schema: []}
|
27
|
+
format.each_pair do |key, values|
|
28
|
+
if values.any? {|value| value.match(/[^0-9]/) }
|
29
|
+
guessed[:schema] << {name: key, type: :string}
|
30
|
+
else
|
31
|
+
guessed[:schema] << {name: key, type: :long}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
return {"parser" => guessed}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "uri"
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Parser
|
5
|
+
|
6
|
+
class QueryString < ParserPlugin
|
7
|
+
Plugin.register_parser("query_string", self)
|
8
|
+
|
9
|
+
def self.transaction(config, &control)
|
10
|
+
decoder_task = config.load_config(Java::LineDecoder::DecoderTask)
|
11
|
+
|
12
|
+
task = {
|
13
|
+
decoder: DataSource.from_java(decoder_task.dump),
|
14
|
+
strip_quote: config.param("strip_quote", :bool, default: true),
|
15
|
+
strip_whitespace: config.param("strip_whitespace", :bool, default: true),
|
16
|
+
}
|
17
|
+
|
18
|
+
columns = []
|
19
|
+
schema = config.param(:schema, :array, default: [])
|
20
|
+
schema.each do |column|
|
21
|
+
name = column["name"]
|
22
|
+
type = column["type"].to_sym
|
23
|
+
|
24
|
+
columns << Column.new(nil, name, type)
|
25
|
+
end
|
26
|
+
|
27
|
+
yield(task, columns)
|
28
|
+
end
|
29
|
+
|
30
|
+
def init
|
31
|
+
@options = {
|
32
|
+
strip_quote: task[:strip_quote],
|
33
|
+
strip_whitespace: task[:strip_whitespace],
|
34
|
+
}
|
35
|
+
|
36
|
+
@decoder = task.param(:decoder, :hash).load_task(Java::LineDecoder::DecoderTask)
|
37
|
+
end
|
38
|
+
|
39
|
+
def run(file_input)
|
40
|
+
decoder = Java::LineDecoder.new(file_input.instance_variable_get(:@java_file_input), @decoder)
|
41
|
+
|
42
|
+
while decoder.nextFile
|
43
|
+
while line = decoder.poll
|
44
|
+
process_line(line)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
page_builder.finish
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse(line, options = {})
|
52
|
+
line.chomp!
|
53
|
+
line.strip! if options[:strip_whitespace]
|
54
|
+
if options[:strip_quote]
|
55
|
+
line = line[/\A(?:["'])?(.*?)(?:["'])?\z/, 1]
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
Hash[URI.decode_www_form(line)]
|
60
|
+
rescue ArgumentError
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def process_line(line)
|
68
|
+
record = self.class.parse(line, @options)
|
69
|
+
|
70
|
+
return unless record
|
71
|
+
|
72
|
+
records = schema.map do |column|
|
73
|
+
record[column.name]
|
74
|
+
end
|
75
|
+
page_builder.add(records)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/partial-config.yml
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
require "prepare_embulk"
|
2
|
+
require "embulk/guess/query_string"
|
3
|
+
require "embulk/data_source"
|
4
|
+
|
5
|
+
module Embulk
|
6
|
+
module Guess
|
7
|
+
class QueryStringTest < Test::Unit::TestCase
|
8
|
+
class TestGuessLines < self
|
9
|
+
data do
|
10
|
+
{
|
11
|
+
same_keys: [sample_lines_with_same_keys, schema_with_same_keys],
|
12
|
+
different_keys: [sample_lines_with_different_keys, schema_with_different_keys],
|
13
|
+
invalid: [sample_lines_with_invalid, schema_with_invalid],
|
14
|
+
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_schema(data)
|
19
|
+
sample_lines, schema = data
|
20
|
+
actual = QueryString.new.guess_lines(config, sample_lines)
|
21
|
+
expected = {
|
22
|
+
"parser" => {
|
23
|
+
type: "query_string",
|
24
|
+
schema: schema
|
25
|
+
}
|
26
|
+
}
|
27
|
+
assert_equal(expected, actual)
|
28
|
+
end
|
29
|
+
|
30
|
+
data do
|
31
|
+
valid_schema = {
|
32
|
+
"parser" => {
|
33
|
+
type: "query_string",
|
34
|
+
schema: schema_with_same_keys,
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
{
|
39
|
+
"query_string" => ["query_string", valid_schema],
|
40
|
+
"other" => ["other", {}],
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_type(data)
|
45
|
+
type, expected = data
|
46
|
+
sample_lines = self.class.sample_lines_with_same_keys
|
47
|
+
config = DataSource[{parser: {type: type}}]
|
48
|
+
|
49
|
+
actual = QueryString.new.guess_lines(config, sample_lines)
|
50
|
+
assert_equal(expected, actual)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
class << self
|
57
|
+
def sample_lines_with_same_keys
|
58
|
+
[
|
59
|
+
%Q(foo=1&bar=vv&baz=3),
|
60
|
+
%Q(foo=2&bar=ss&baz=a),
|
61
|
+
]
|
62
|
+
end
|
63
|
+
|
64
|
+
def schema_with_same_keys
|
65
|
+
[
|
66
|
+
{name: "foo", type: :long},
|
67
|
+
{name: "bar", type: :string},
|
68
|
+
{name: "baz", type: :string},
|
69
|
+
]
|
70
|
+
end
|
71
|
+
|
72
|
+
def sample_lines_with_different_keys
|
73
|
+
[
|
74
|
+
%Q(foo=1&bar=vv&baz=3&hoge=999),
|
75
|
+
%Q(foo=2&bar=ss&baz=a&xxx=ABC),
|
76
|
+
]
|
77
|
+
end
|
78
|
+
|
79
|
+
def schema_with_different_keys
|
80
|
+
[
|
81
|
+
{name: "foo", type: :long},
|
82
|
+
{name: "bar", type: :string},
|
83
|
+
{name: "baz", type: :string},
|
84
|
+
{name: "hoge", type: :long},
|
85
|
+
{name: "xxx", type: :string},
|
86
|
+
]
|
87
|
+
end
|
88
|
+
|
89
|
+
def sample_lines_with_invalid
|
90
|
+
[
|
91
|
+
%Q(foo=1&bar=vv&baz=3),
|
92
|
+
%Q(this=line=is=invalid),
|
93
|
+
%Q(foo=2&bar=ss&baz=a),
|
94
|
+
]
|
95
|
+
end
|
96
|
+
|
97
|
+
def schema_with_invalid
|
98
|
+
schema_with_same_keys
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def task
|
103
|
+
{
|
104
|
+
strip_quote: true,
|
105
|
+
strip_whitespace: true,
|
106
|
+
schema: columns,
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
def columns
|
111
|
+
[
|
112
|
+
{"name" => "foo", "type" => "string"},
|
113
|
+
{"name" => "bar", "type" => "string"},
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
def config
|
118
|
+
DataSource[task.to_a]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require "prepare_embulk"
|
2
|
+
require "embulk/parser/query_string"
|
3
|
+
require "embulk/data_source"
|
4
|
+
|
5
|
+
module Embulk
|
6
|
+
module Parser
|
7
|
+
class QueryStringPluginTest < Test::Unit::TestCase
|
8
|
+
class TestParse < self
|
9
|
+
def test_without_options
|
10
|
+
result = QueryString.parse(line)
|
11
|
+
assert_equal(expected, result)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_with_strip_quote
|
15
|
+
result = QueryString.parse(quoted_line, strip_quote: true)
|
16
|
+
assert_equal(expected, result)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_with_strip_whitespace
|
20
|
+
result = QueryString.parse(indented_line, strip_whitespace: true)
|
21
|
+
assert_equal(expected, result)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_with_invalid
|
25
|
+
result = QueryString.parse(invalid_line)
|
26
|
+
assert_nil(result)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def expected
|
32
|
+
{"foo" => "FOO", "bar" => "3"}
|
33
|
+
end
|
34
|
+
|
35
|
+
def line
|
36
|
+
%Q(foo=FOO&bar=3)
|
37
|
+
end
|
38
|
+
|
39
|
+
def quoted_line
|
40
|
+
%Q("#{line}")
|
41
|
+
end
|
42
|
+
|
43
|
+
def indented_line
|
44
|
+
%Q( #{line})
|
45
|
+
end
|
46
|
+
|
47
|
+
def invalid_line
|
48
|
+
"invalid=www=form"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class TestProcessBuffer < self
|
53
|
+
def test_process_line
|
54
|
+
records.each do |record|
|
55
|
+
mock(page_builder).add(record.values)
|
56
|
+
end
|
57
|
+
|
58
|
+
plugin.send(:process_line, line)
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def records
|
64
|
+
[
|
65
|
+
{"foo" => "FOO", "bar" => "1"},
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def line
|
70
|
+
"foo=FOO&bar=1"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_transaction
|
75
|
+
QueryString.transaction(config) do |actual_task, actual_columns|
|
76
|
+
t = task.dup
|
77
|
+
t.delete(:schema)
|
78
|
+
assert_equal(t, actual_task)
|
79
|
+
assert_equal(schema, actual_columns)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def plugin
|
86
|
+
@plugin ||= QueryString.new(DataSource[task], schema, page_builder)
|
87
|
+
end
|
88
|
+
|
89
|
+
def page_builder
|
90
|
+
@page_builder ||= Object.new
|
91
|
+
end
|
92
|
+
|
93
|
+
def task
|
94
|
+
{
|
95
|
+
decoder: {"Charset" => "UTF-8", "Newline" => "CRLF"},
|
96
|
+
strip_quote: true,
|
97
|
+
strip_whitespace: true,
|
98
|
+
schema: columns,
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
def columns
|
103
|
+
[
|
104
|
+
{"name" => "foo", "type" => "string"},
|
105
|
+
{"name" => "bar", "type" => "string"},
|
106
|
+
]
|
107
|
+
end
|
108
|
+
|
109
|
+
def schema
|
110
|
+
columns.map do |column|
|
111
|
+
Column.new(nil, column["name"], column["type"].to_sym)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def config
|
116
|
+
DataSource[task.to_a]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require "embulk/command/embulk_run"
|
2
|
+
|
3
|
+
classpath_dir = Embulk.home("classpath")
|
4
|
+
jars = Dir.entries(classpath_dir).select{|f| f =~ /\.jar$/ }.sort
|
5
|
+
jars.each do |jar|
|
6
|
+
require File.join(classpath_dir, jar)
|
7
|
+
end
|
8
|
+
|
9
|
+
props = java.util.Properties.new
|
10
|
+
props.setProperty("embulk.use_global_ruby_runtime", "true")
|
11
|
+
|
12
|
+
bootstrap_model_manager = org.embulk.config.ModelManager.new(nil, com.fasterxml.jackson.databind.ObjectMapper.new)
|
13
|
+
system_config = org.embulk.config.ConfigLoader.new(bootstrap_model_manager).fromPropertiesYamlLiteral(props, "embulk.")
|
14
|
+
org.embulk.EmbulkService.new(system_config).injector.getInstance(java.lang.Class.forName('org.jruby.embed.ScriptingContainer'))
|
15
|
+
|
16
|
+
require "embulk"
|
data/test/run-test.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
4
|
+
lib_dir = File.join(base_dir, "lib")
|
5
|
+
test_dir = File.join(base_dir, "test")
|
6
|
+
|
7
|
+
require "test-unit"
|
8
|
+
require "test/unit/rr"
|
9
|
+
require "codeclimate-test-reporter"
|
10
|
+
|
11
|
+
$LOAD_PATH.unshift(lib_dir)
|
12
|
+
$LOAD_PATH.unshift(test_dir)
|
13
|
+
|
14
|
+
ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
|
15
|
+
|
16
|
+
CodeClimate::TestReporter.start
|
17
|
+
|
18
|
+
exit Test::Unit::AutoRunner.run(true, test_dir)
|
metadata
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-parser-query_string
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshihara
|
8
|
+
- uu59
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-07-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.13
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '1.0'
|
23
|
+
name: embulk
|
24
|
+
prerelease: false
|
25
|
+
type: :development
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 0.6.13
|
31
|
+
- - <
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ~>
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.0'
|
40
|
+
name: bundler
|
41
|
+
prerelease: false
|
42
|
+
type: :development
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '10.0'
|
54
|
+
name: rake
|
55
|
+
prerelease: false
|
56
|
+
type: :development
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
name: pry
|
69
|
+
prerelease: false
|
70
|
+
type: :development
|
71
|
+
version_requirements: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
name: test-unit
|
83
|
+
prerelease: false
|
84
|
+
type: :development
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
name: test-unit-rr
|
97
|
+
prerelease: false
|
98
|
+
type: :development
|
99
|
+
version_requirements: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
name: codeclimate-test-reporter
|
111
|
+
prerelease: false
|
112
|
+
type: :development
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
description: Parses Query String files read by other file input plugins.
|
119
|
+
email:
|
120
|
+
- h.yoshihara@everyleaf.com
|
121
|
+
- k@uu59.org
|
122
|
+
executables: []
|
123
|
+
extensions: []
|
124
|
+
extra_rdoc_files: []
|
125
|
+
files:
|
126
|
+
- .gitignore
|
127
|
+
- .travis.yml
|
128
|
+
- CHANGELOG.md
|
129
|
+
- Gemfile
|
130
|
+
- LICENSE
|
131
|
+
- LICENSE.txt
|
132
|
+
- README.md
|
133
|
+
- Rakefile
|
134
|
+
- embulk-parser-query_string.gemspec
|
135
|
+
- lib/embulk/guess/query_string.rb
|
136
|
+
- lib/embulk/parser/query_string.rb
|
137
|
+
- partial-config.yml
|
138
|
+
- test/embulk/guess/test_query_string.rb
|
139
|
+
- test/embulk/parser/test_query_string_plugin.rb
|
140
|
+
- test/prepare_embulk.rb
|
141
|
+
- test/run-test.rb
|
142
|
+
homepage: https://github.com/treasure-data/embulk-parser-query_string
|
143
|
+
licenses:
|
144
|
+
- Apache2
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.4.6
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: Query String parser plugin for Embulk
|
166
|
+
test_files:
|
167
|
+
- test/embulk/guess/test_query_string.rb
|
168
|
+
- test/embulk/parser/test_query_string_plugin.rb
|
169
|
+
- test/prepare_embulk.rb
|
170
|
+
- test/run-test.rb
|