embulk-parser-query_string 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb9918e68aa9b4a3569fefcbb2ffa477aba99378
4
- data.tar.gz: cbf1b7c3d4d14ecc0180c9de70f8e64b59c185b3
3
+ metadata.gz: 482d53ac57ad5d31f0149507090deeb010743c45
4
+ data.tar.gz: 4fac1b68e078caf3f88e763ce7af0deceb0eeff6
5
5
  SHA512:
6
- metadata.gz: c60a005d7be4dd2de37445d1b7e696043824ef69f8c05c4fb90304aea8de14f0a75507031f7667ddacdcf2750806bdd2a657d47ab27a6fbcd384431e939c18fc
7
- data.tar.gz: c433316f33f2b8b3ad29b7ec052f14a393c0101ace98ac103cf0d965a5a3fd43915c1e93d2c0273393f7642f53b8793355c33515923ef6e5dc2c58f7cca92094
6
+ metadata.gz: f06bd83e24c40a98d38d343e4d3af7febfc761f39fab704538b9d8a45046219a1d1ffa2c0d49d09fe8bce07ba11f67c301405425921f191756333f80a6e96f4b
7
+ data.tar.gz: fbb0c6bd32fa340e4bb0bbb50c17a5f8a3265ea37af262679259f32ebe3571bbeb669420eae1ef47d16a7c279ef99059bcc71405b6bfc29d995f2a549d305396
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.1.0 - 2015-07-14
2
+ * [enhancement] Add capture option [#11](https://github.com/treasure-data/embulk-parser-query_string/pull/11)
3
+
1
4
  ## 0.0.3 - 2015-07-08
2
5
 
3
6
  * [enhancement] Support embulk 0.6.10 (backward compatibility) [#9](https://github.com/treasure-data/embulk-parser-query_string/pull/9)
data/README.md CHANGED
@@ -22,6 +22,7 @@ Required Embulk version >= 0.6.10.
22
22
 
23
23
  - **strip_quote**: If you have quoted lines file such as `"foo=FOO&bar=BAR"`, should be true for strip their quotes. (bool, default: true)
24
24
  - **strip_whitespace**: Strip whitespace before parsing lines for any indented line parse correctly such as ' foo=FOO'. (bool, default: true)
25
+ - **capture**: Capture valuable text from each line using Regexp. Matched first pattern (a.k.a `$1`) will be used. See also [partial-config.yml](./partial-config.yml) (string, default: nil)
25
26
 
26
27
  ## Example
27
28
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-query_string"
4
- spec.version = "0.0.3"
4
+ spec.version = "0.1.0"
5
5
  spec.authors = ["yoshihara", "uu59"]
6
6
  spec.summary = "Query String parser plugin for Embulk"
7
7
  spec.description = "Parses Query String files read by other file input plugins."
@@ -10,9 +10,11 @@ module Embulk
10
10
  def guess_lines(config, sample_lines)
11
11
  return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"
12
12
 
13
+ parser_config = config.param("parser", :hash)
13
14
  options = {
14
- strip_quote: config.param("strip_quote", :bool, default: true),
15
- strip_whitespace: config.param("strip_whitespace", :bool, default: true)
15
+ strip_quote: parser_config.param("strip_quote", :bool, default: true),
16
+ strip_whitespace: parser_config.param("strip_whitespace", :bool, default: true),
17
+ capture: parser_config.param("capture", :string, default: nil)
16
18
  }
17
19
  records = sample_lines.map do |line|
18
20
  Parser::QueryString.parse(line, options) || {}
@@ -49,6 +49,10 @@ module Embulk
49
49
  end
50
50
 
51
51
  def self.parse(line, options = {})
52
+ if options[:capture]
53
+ line = line.match(options[:capture]).to_a[1]
54
+ # TODO: detect incorrect regexp given
55
+ end
52
56
  line.chomp!
53
57
  line.strip! if options[:strip_whitespace]
54
58
  if options[:strip_quote]
data/partial-config.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: ./file
3
+ path_prefix: ./target_file
4
4
  parser:
5
5
  strip_quote: true
6
6
  strip_whitespace: true
7
+ capture: 'www01: (".*?")' # sample regexp to capture query string from such line -> www01: "uid=123&s=foo"
7
8
  exec: {}
8
9
  out: {type: stdout}
@@ -101,9 +101,11 @@ module Embulk
101
101
 
102
102
  def task
103
103
  {
104
- strip_quote: true,
105
- strip_whitespace: true,
106
- schema: columns,
104
+ parser: {
105
+ strip_quote: true,
106
+ strip_whitespace: true,
107
+ schema: columns,
108
+ }
107
109
  }
108
110
  end
109
111
 
@@ -21,6 +21,16 @@ module Embulk
21
21
  assert_equal(expected, result)
22
22
  end
23
23
 
24
+ def test_with_capture
25
+ result = QueryString.parse(indented_line, capture: /^ *(.*)$/)
26
+ assert_equal(expected, result)
27
+ end
28
+
29
+ def test_with_capture_and_quote
30
+ result = QueryString.parse(complex_line, strip_quote: true, capture: /^[^"]*(".*?")$/)
31
+ assert_equal(expected, result)
32
+ end
33
+
24
34
  def test_with_invalid
25
35
  result = QueryString.parse(invalid_line)
26
36
  assert_nil(result)
@@ -44,6 +54,10 @@ module Embulk
44
54
  %Q( #{line})
45
55
  end
46
56
 
57
+ def complex_line
58
+ %Q(Jul 11 11:22:33 ec2-instance-001 : "#{line}")
59
+ end
60
+
47
61
  def invalid_line
48
62
  "invalid=www=form"
49
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-query_string
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-07-08 00:00:00.000000000 Z
12
+ date: 2015-07-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement