embulk-parser-query_string 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb9918e68aa9b4a3569fefcbb2ffa477aba99378
4
- data.tar.gz: cbf1b7c3d4d14ecc0180c9de70f8e64b59c185b3
3
+ metadata.gz: 482d53ac57ad5d31f0149507090deeb010743c45
4
+ data.tar.gz: 4fac1b68e078caf3f88e763ce7af0deceb0eeff6
5
5
  SHA512:
6
- metadata.gz: c60a005d7be4dd2de37445d1b7e696043824ef69f8c05c4fb90304aea8de14f0a75507031f7667ddacdcf2750806bdd2a657d47ab27a6fbcd384431e939c18fc
7
- data.tar.gz: c433316f33f2b8b3ad29b7ec052f14a393c0101ace98ac103cf0d965a5a3fd43915c1e93d2c0273393f7642f53b8793355c33515923ef6e5dc2c58f7cca92094
6
+ metadata.gz: f06bd83e24c40a98d38d343e4d3af7febfc761f39fab704538b9d8a45046219a1d1ffa2c0d49d09fe8bce07ba11f67c301405425921f191756333f80a6e96f4b
7
+ data.tar.gz: fbb0c6bd32fa340e4bb0bbb50c17a5f8a3265ea37af262679259f32ebe3571bbeb669420eae1ef47d16a7c279ef99059bcc71405b6bfc29d995f2a549d305396
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.1.0 - 2015-07-14
2
+ * [enhancement] Add capture option [#11](https://github.com/treasure-data/embulk-parser-query_string/pull/11)
3
+
1
4
  ## 0.0.3 - 2015-07-08
2
5
 
3
6
  * [enhancement] Support embulk 0.6.10 (backward compatibility) [#9](https://github.com/treasure-data/embulk-parser-query_string/pull/9)
data/README.md CHANGED
@@ -22,6 +22,7 @@ Required Embulk version >= 0.6.10.
22
22
 
23
23
  - **strip_quote**: If you have quoted lines file such as `"foo=FOO&bar=BAR"`, should be true for strip their quotes. (bool, default: true)
24
24
  - **strip_whitespace**: Strip whitespace before parsing lines for any indented line parse correctly such as ' foo=FOO'. (bool, default: true)
25
+ - **capture**: Capture valuable text from each line using Regexp. Matched first pattern (a.k.a `$1`) will be used. See also [partial-config.yml](./partial-config.yml) (string, default: nil)
25
26
 
26
27
  ## Example
27
28
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-query_string"
4
- spec.version = "0.0.3"
4
+ spec.version = "0.1.0"
5
5
  spec.authors = ["yoshihara", "uu59"]
6
6
  spec.summary = "Query String parser plugin for Embulk"
7
7
  spec.description = "Parses Query String files read by other file input plugins."
@@ -10,9 +10,11 @@ module Embulk
10
10
  def guess_lines(config, sample_lines)
11
11
  return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"
12
12
 
13
+ parser_config = config.param("parser", :hash)
13
14
  options = {
14
- strip_quote: config.param("strip_quote", :bool, default: true),
15
- strip_whitespace: config.param("strip_whitespace", :bool, default: true)
15
+ strip_quote: parser_config.param("strip_quote", :bool, default: true),
16
+ strip_whitespace: parser_config.param("strip_whitespace", :bool, default: true),
17
+ capture: parser_config.param("capture", :string, default: nil)
16
18
  }
17
19
  records = sample_lines.map do |line|
18
20
  Parser::QueryString.parse(line, options) || {}
@@ -49,6 +49,10 @@ module Embulk
49
49
  end
50
50
 
51
51
  def self.parse(line, options = {})
52
+ if options[:capture]
53
+ line = line.match(options[:capture]).to_a[1]
54
+ # TODO: detect incorrect regexp given
55
+ end
52
56
  line.chomp!
53
57
  line.strip! if options[:strip_whitespace]
54
58
  if options[:strip_quote]
data/partial-config.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  in:
2
2
  type: file
3
- path_prefix: ./file
3
+ path_prefix: ./target_file
4
4
  parser:
5
5
  strip_quote: true
6
6
  strip_whitespace: true
7
+ capture: 'www01: (".*?")' # sample regexp to capture query string from such line -> www01: "uid=123&s=foo"
7
8
  exec: {}
8
9
  out: {type: stdout}
@@ -101,9 +101,11 @@ module Embulk
101
101
 
102
102
  def task
103
103
  {
104
- strip_quote: true,
105
- strip_whitespace: true,
106
- schema: columns,
104
+ parser: {
105
+ strip_quote: true,
106
+ strip_whitespace: true,
107
+ schema: columns,
108
+ }
107
109
  }
108
110
  end
109
111
 
@@ -21,6 +21,16 @@ module Embulk
21
21
  assert_equal(expected, result)
22
22
  end
23
23
 
24
+ def test_with_capture
25
+ result = QueryString.parse(indented_line, capture: /^ *(.*)$/)
26
+ assert_equal(expected, result)
27
+ end
28
+
29
+ def test_with_capture_and_quote
30
+ result = QueryString.parse(complex_line, strip_quote: true, capture: /^[^"]*(".*?")$/)
31
+ assert_equal(expected, result)
32
+ end
33
+
24
34
  def test_with_invalid
25
35
  result = QueryString.parse(invalid_line)
26
36
  assert_nil(result)
@@ -44,6 +54,10 @@ module Embulk
44
54
  %Q( #{line})
45
55
  end
46
56
 
57
+ def complex_line
58
+ %Q(Jul 11 11:22:33 ec2-instance-001 : "#{line}")
59
+ end
60
+
47
61
  def invalid_line
48
62
  "invalid=www=form"
49
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-query_string
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-07-08 00:00:00.000000000 Z
12
+ date: 2015-07-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement