embulk-parser-query_string 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +1 -0
- data/embulk-parser-query_string.gemspec +1 -1
- data/lib/embulk/guess/query_string.rb +4 -2
- data/lib/embulk/parser/query_string.rb +4 -0
- data/partial-config.yml +2 -1
- data/test/embulk/guess/test_query_string.rb +5 -3
- data/test/embulk/parser/test_query_string_plugin.rb +14 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 482d53ac57ad5d31f0149507090deeb010743c45
|
4
|
+
data.tar.gz: 4fac1b68e078caf3f88e763ce7af0deceb0eeff6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f06bd83e24c40a98d38d343e4d3af7febfc761f39fab704538b9d8a45046219a1d1ffa2c0d49d09fe8bce07ba11f67c301405425921f191756333f80a6e96f4b
|
7
|
+
data.tar.gz: fbb0c6bd32fa340e4bb0bbb50c17a5f8a3265ea37af262679259f32ebe3571bbeb669420eae1ef47d16a7c279ef99059bcc71405b6bfc29d995f2a549d305396
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.1.0 - 2015-07-14
|
2
|
+
* [enhancement] Add capture option [#11](https://github.com/treasure-data/embulk-parser-query_string/pull/11)
|
3
|
+
|
1
4
|
## 0.0.3 - 2015-07-08
|
2
5
|
|
3
6
|
* [enhancement] Support embulk 0.6.10 (backward compatibility) [#9](https://github.com/treasure-data/embulk-parser-query_string/pull/9)
|
data/README.md
CHANGED
@@ -22,6 +22,7 @@ Required Embulk version >= 0.6.10.
|
|
22
22
|
|
23
23
|
- **strip_quote**: If you have quoted lines file such as `"foo=FOO&bar=BAR"`, should be true for strip their quotes. (bool, default: true)
|
24
24
|
- **strip_whitespace**: Strip whitespace before parsing lines for any indented line parse correctly such as ' foo=FOO'. (bool, default: true)
|
25
|
+
- **capture**: Capture valuable text from each line using Regexp. Matched first pattern (a.k.a `$1`) will be used. See also [partial-config.yml](./partial-config.yml) (string, default: nil)
|
25
26
|
|
26
27
|
## Example
|
27
28
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-query_string"
|
4
|
-
spec.version = "0.0
|
4
|
+
spec.version = "0.1.0"
|
5
5
|
spec.authors = ["yoshihara", "uu59"]
|
6
6
|
spec.summary = "Query String parser plugin for Embulk"
|
7
7
|
spec.description = "Parses Query String files read by other file input plugins."
|
@@ -10,9 +10,11 @@ module Embulk
|
|
10
10
|
def guess_lines(config, sample_lines)
|
11
11
|
return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"
|
12
12
|
|
13
|
+
parser_config = config.param("parser", :hash)
|
13
14
|
options = {
|
14
|
-
strip_quote:
|
15
|
-
strip_whitespace:
|
15
|
+
strip_quote: parser_config.param("strip_quote", :bool, default: true),
|
16
|
+
strip_whitespace: parser_config.param("strip_whitespace", :bool, default: true),
|
17
|
+
capture: parser_config.param("capture", :string, default: nil)
|
16
18
|
}
|
17
19
|
records = sample_lines.map do |line|
|
18
20
|
Parser::QueryString.parse(line, options) || {}
|
@@ -49,6 +49,10 @@ module Embulk
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def self.parse(line, options = {})
|
52
|
+
if options[:capture]
|
53
|
+
line = line.match(options[:capture]).to_a[1]
|
54
|
+
# TODO: detect incorrect regexp given
|
55
|
+
end
|
52
56
|
line.chomp!
|
53
57
|
line.strip! if options[:strip_whitespace]
|
54
58
|
if options[:strip_quote]
|
data/partial-config.yml
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
in:
|
2
2
|
type: file
|
3
|
-
path_prefix: ./
|
3
|
+
path_prefix: ./target_file
|
4
4
|
parser:
|
5
5
|
strip_quote: true
|
6
6
|
strip_whitespace: true
|
7
|
+
capture: 'www01: (".*?")' # sample regexp to capture query string from such line -> www01: "uid=123&s=foo"
|
7
8
|
exec: {}
|
8
9
|
out: {type: stdout}
|
@@ -21,6 +21,16 @@ module Embulk
|
|
21
21
|
assert_equal(expected, result)
|
22
22
|
end
|
23
23
|
|
24
|
+
def test_with_capture
|
25
|
+
result = QueryString.parse(indented_line, capture: /^ *(.*)$/)
|
26
|
+
assert_equal(expected, result)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_with_capture_and_quote
|
30
|
+
result = QueryString.parse(complex_line, strip_quote: true, capture: /^[^"]*(".*?")$/)
|
31
|
+
assert_equal(expected, result)
|
32
|
+
end
|
33
|
+
|
24
34
|
def test_with_invalid
|
25
35
|
result = QueryString.parse(invalid_line)
|
26
36
|
assert_nil(result)
|
@@ -44,6 +54,10 @@ module Embulk
|
|
44
54
|
%Q( #{line})
|
45
55
|
end
|
46
56
|
|
57
|
+
def complex_line
|
58
|
+
%Q(Jul 11 11:22:33 ec2-instance-001 : "#{line}")
|
59
|
+
end
|
60
|
+
|
47
61
|
def invalid_line
|
48
62
|
"invalid=www=form"
|
49
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-query_string
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-07-
|
12
|
+
date: 2015-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|