embulk-parser-query_string 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +1 -0
- data/embulk-parser-query_string.gemspec +1 -1
- data/lib/embulk/guess/query_string.rb +4 -2
- data/lib/embulk/parser/query_string.rb +4 -0
- data/partial-config.yml +2 -1
- data/test/embulk/guess/test_query_string.rb +5 -3
- data/test/embulk/parser/test_query_string_plugin.rb +14 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 482d53ac57ad5d31f0149507090deeb010743c45
|
4
|
+
data.tar.gz: 4fac1b68e078caf3f88e763ce7af0deceb0eeff6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f06bd83e24c40a98d38d343e4d3af7febfc761f39fab704538b9d8a45046219a1d1ffa2c0d49d09fe8bce07ba11f67c301405425921f191756333f80a6e96f4b
|
7
|
+
data.tar.gz: fbb0c6bd32fa340e4bb0bbb50c17a5f8a3265ea37af262679259f32ebe3571bbeb669420eae1ef47d16a7c279ef99059bcc71405b6bfc29d995f2a549d305396
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.1.0 - 2015-07-14
|
2
|
+
* [enhancement] Add capture option [#11](https://github.com/treasure-data/embulk-parser-query_string/pull/11)
|
3
|
+
|
1
4
|
## 0.0.3 - 2015-07-08
|
2
5
|
|
3
6
|
* [enhancement] Support embulk 0.6.10 (backward compatibility) [#9](https://github.com/treasure-data/embulk-parser-query_string/pull/9)
|
data/README.md
CHANGED
@@ -22,6 +22,7 @@ Required Embulk version >= 0.6.10.
|
|
22
22
|
|
23
23
|
- **strip_quote**: If you have quoted lines file such as `"foo=FOO&bar=BAR"`, should be true for strip their quotes. (bool, default: true)
|
24
24
|
- **strip_whitespace**: Strip whitespace before parsing lines for any indented line parse correctly such as ' foo=FOO'. (bool, default: true)
|
25
|
+
- **capture**: Capture valuable text from each line using Regexp. Matched first pattern (a.k.a `$1`) will be used. See also [partial-config.yml](./partial-config.yml) (string, default: nil)
|
25
26
|
|
26
27
|
## Example
|
27
28
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-query_string"
|
4
|
-
spec.version = "0.0
|
4
|
+
spec.version = "0.1.0"
|
5
5
|
spec.authors = ["yoshihara", "uu59"]
|
6
6
|
spec.summary = "Query String parser plugin for Embulk"
|
7
7
|
spec.description = "Parses Query String files read by other file input plugins."
|
@@ -10,9 +10,11 @@ module Embulk
|
|
10
10
|
def guess_lines(config, sample_lines)
|
11
11
|
return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"
|
12
12
|
|
13
|
+
parser_config = config.param("parser", :hash)
|
13
14
|
options = {
|
14
|
-
strip_quote:
|
15
|
-
strip_whitespace:
|
15
|
+
strip_quote: parser_config.param("strip_quote", :bool, default: true),
|
16
|
+
strip_whitespace: parser_config.param("strip_whitespace", :bool, default: true),
|
17
|
+
capture: parser_config.param("capture", :string, default: nil)
|
16
18
|
}
|
17
19
|
records = sample_lines.map do |line|
|
18
20
|
Parser::QueryString.parse(line, options) || {}
|
@@ -49,6 +49,10 @@ module Embulk
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def self.parse(line, options = {})
|
52
|
+
if options[:capture]
|
53
|
+
line = line.match(options[:capture]).to_a[1]
|
54
|
+
# TODO: detect incorrect regexp given
|
55
|
+
end
|
52
56
|
line.chomp!
|
53
57
|
line.strip! if options[:strip_whitespace]
|
54
58
|
if options[:strip_quote]
|
data/partial-config.yml
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: ./
|
3
|
+
path_prefix: ./target_file
|
4
4
|
parser:
|
5
5
|
strip_quote: true
|
6
6
|
strip_whitespace: true
|
7
|
+
capture: 'www01: (".*?")' # sample regexp to capture query string from such line -> www01: "uid=123&s=foo"
|
7
8
|
exec: {}
|
8
9
|
out: {type: stdout}
|
@@ -21,6 +21,16 @@ module Embulk
|
|
21
21
|
assert_equal(expected, result)
|
22
22
|
end
|
23
23
|
|
24
|
+
def test_with_capture
|
25
|
+
result = QueryString.parse(indented_line, capture: /^ *(.*)$/)
|
26
|
+
assert_equal(expected, result)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_with_capture_and_quote
|
30
|
+
result = QueryString.parse(complex_line, strip_quote: true, capture: /^[^"]*(".*?")$/)
|
31
|
+
assert_equal(expected, result)
|
32
|
+
end
|
33
|
+
|
24
34
|
def test_with_invalid
|
25
35
|
result = QueryString.parse(invalid_line)
|
26
36
|
assert_nil(result)
|
@@ -44,6 +54,10 @@ module Embulk
|
|
44
54
|
%Q( #{line})
|
45
55
|
end
|
46
56
|
|
57
|
+
def complex_line
|
58
|
+
%Q(Jul 11 11:22:33 ec2-instance-001 : "#{line}")
|
59
|
+
end
|
60
|
+
|
47
61
|
def invalid_line
|
48
62
|
"invalid=www=form"
|
49
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-query_string
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-07-
|
12
|
+
date: 2015-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|