embulk-parser-regex 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5334bd1ab64c181d169a2eb53c1ae9c0888ee8a2
4
- data.tar.gz: 03e5d5c3d3183d2ce0bd26cd9dfcf1383fb5a5b3
3
+ metadata.gz: 180e9d8ef6ff872d1aa840f937972ba228b2762c
4
+ data.tar.gz: 5ae14fb13a5ac890f8f4c7224cf14fbe660ddb22
5
5
  SHA512:
6
- metadata.gz: 3271593ae11fba49ce1b5171342492249ddfdda1f50c1ad9ca0bed0046fbebdba64efb109cd386bae3d9f3dfb1c24d309fa07d5c32b4a0abdb580810a9f30a1b
7
- data.tar.gz: ebb736e2051d8604e4ec21f9ddcce9aa449dd36e4ac1bf4ccce4a9376220078597a1a4802204213f3c5ede5c9971f39b16df6303c1f00df37f851c634108fcb3
6
+ metadata.gz: bf0f5d97601bd217bc28b955d1046456d9231c9cbf7614c82fe90dfe70fb43716ce09ab3762e0bf2737ffc9a1e6774e23086e7a61c2b5526afc284619df89a0d
7
+ data.tar.gz: 7e1a23835f82c112616998ce5c121ddac25299ac0d91afb934b6a5f8218b295a561a7842446b261a79a83dea9d267dca1d60f827e3a72e99675c824b4ee40a7d
data/.gitignore CHANGED
@@ -6,3 +6,5 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
+ *.iml
10
+ *.gem
data/README.md CHANGED
@@ -11,7 +11,7 @@ A simple parser Using Regular Expression.
11
11
 
12
12
  - **regex**: regular expression that must use [Named Capturing Group](https://blogs.oracle.com/xuemingshen/entry/named_capturing_group_in_jdk7) (string, required)
13
13
  - **columns**: column definition (list of object)
14
- - **regexName**: 'Named Capturing Group' can only include `[a-zA-Z0-9]`, so alias group name in regex can be specified (string, default: `<name> attr value`)
14
+ - **regex_name**: 'Named Capturing Group' can only include `[a-zA-Z0-9]`, so alias group name in regex can be specified (string, default: `<name> attr value`)
15
15
  - **skip_if_unmatch**: if false, when a line don't match the regex, raise RuntimeException. If true, skip the line. (boolean, default: `false`)
16
16
 
17
17
  ## Example
@@ -23,7 +23,7 @@ in:
23
23
  type: regex
24
24
  regex: ^(?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)" (?<inByte>[0-9]+) (?<outByte>[0-9]+)$
25
25
  columns:
26
- - {name: remote_host, type: string, regexName: remoteHost}
26
+ - {name: remote_host, type: string, regex_name: remoteHost}
27
27
  - {name: identity, type: string}
28
28
  - {name: user, type: string}
29
29
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -33,9 +33,9 @@ in:
33
33
  - {name: status, type: long}
34
34
  - {name: size, type: long}
35
35
  - {name: referer, type: string}
36
- - {name: user_agent, type: string, regexName: userAgent}
37
- - {name: in_byte, type: long, regexName: inByte}
38
- - {name: out_byte, type: long, regexName: outByte}
36
+ - {name: user_agent, type: string, regex_name: userAgent}
37
+ - {name: in_byte, type: long, regex_name: inByte}
38
+ - {name: out_byte, type: long, regex_name: outByte}
39
39
  ```
40
40
 
41
41
  ### Guess
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.2.0"
17
17
 
18
18
  dependencies {
19
19
  compile "org.embulk:embulk-core:0.7.4"
@@ -26,12 +26,12 @@ module Embulk
26
26
 
27
27
  def apache_x_forwarded_for
28
28
  RegexApacheLogGuesser.new
29
- .ip_or_minus(:x_forwarded_for, regexName: 'forwardedFor')
29
+ .ip_or_minus(:x_forwarded_for, regex_name: 'forwardedFor')
30
30
  end
31
31
 
32
32
  def apache_common(config, sample_lines)
33
33
  RegexApacheLogGuesser.new
34
- .ip(:remote_host, regexName: 'remoteHost').token(:identity).token(:user)
34
+ .ip(:remote_host, regex_name: 'remoteHost').token(:identity).token(:user)
35
35
  .kakko(:datetime, format: '%d/%b/%Y:%H:%M:%S %z', type: 'timestamp')
36
36
  .method_path_protocol
37
37
  .integer(:status).integer_or_minus(:size)
@@ -39,12 +39,12 @@ module Embulk
39
39
 
40
40
  def apache_combined(config, sample_lines)
41
41
  apache_common(config, sample_lines)
42
- .string(:referer).string(:user_agent, regexName: 'userAgent')
42
+ .string(:referer).string(:user_agent, regex_name: 'userAgent')
43
43
  end
44
44
 
45
45
  def apache_combinedio(config, sample_lines)
46
46
  apache_combined(config, sample_lines)
47
- .integer(:in_byte, regexName: 'inByte').integer(:out_byte, regexName: 'outByte')
47
+ .integer(:in_byte, regex_name: 'inByte').integer(:out_byte, regex_name: 'outByte')
48
48
  end
49
49
  end
50
50
 
@@ -82,49 +82,49 @@ module Embulk
82
82
  end
83
83
 
84
84
  def ip(name, opts={})
85
- @patterns << "(?<#{opts[:regexName] || name}>[.:0-9]+)"
85
+ @patterns << "(?<#{opts[:regex_name] || name}>[.:0-9]+)"
86
86
  @columns << {:name => name, :type => 'string'}.merge(opts)
87
87
  self
88
88
  end
89
89
 
90
90
  def ip_or_minus(name, opts={})
91
- @patterns << "(?<#{opts[:regexName] || name}>[.:0-9]+|-)"
91
+ @patterns << "(?<#{opts[:regex_name] || name}>[.:0-9]+|-)"
92
92
  @columns << {:name => name, :type => 'string'}.merge(opts)
93
93
  self
94
94
  end
95
95
 
96
96
  def token(name, opts={})
97
- @patterns << "(?<#{opts[:regexName] || name}>\\S+)"
97
+ @patterns << "(?<#{opts[:regex_name] || name}>\\S+)"
98
98
  @columns << {:name => name, :type => 'string'}.merge(opts)
99
99
  self
100
100
  end
101
101
 
102
102
  def string(name, opts={})
103
- @patterns << "\"(?<#{opts[:regexName] || name}>[^\"]*)\""
103
+ @patterns << "\"(?<#{opts[:regex_name] || name}>[^\"]*)\""
104
104
  @columns << {:name => name, :type => 'string'}.merge(opts)
105
105
  self
106
106
  end
107
107
 
108
108
  def string_or_minus(name, opts={})
109
- @patterns << "\"(?<#{opts[:regexName] || name}>[^\"]*|-)\""
109
+ @patterns << "\"(?<#{opts[:regex_name] || name}>[^\"]*|-)\""
110
110
  @columns << {:name => name, :type => 'string'}.merge(opts)
111
111
  self
112
112
  end
113
113
 
114
114
  def integer(name, opts={})
115
- @patterns << "(?<#{opts[:regexName] || name}>[0-9]+)"
115
+ @patterns << "(?<#{opts[:regex_name] || name}>[0-9]+)"
116
116
  @columns << {:name => name, :type => 'long'}.merge(opts)
117
117
  self
118
118
  end
119
119
 
120
120
  def integer_or_minus(name, opts={})
121
- @patterns << "(?<#{opts[:regexName] || name}>[0-9]+|-)"
121
+ @patterns << "(?<#{opts[:regex_name] || name}>[0-9]+|-)"
122
122
  @columns << {:name => name, :type => 'long'}.merge(opts)
123
123
  self
124
124
  end
125
125
 
126
126
  def kakko(name, opts={})
127
- @patterns << "\\[(?<#{opts[:regexName] || name}>[^\\]]*)\\]"
127
+ @patterns << "\\[(?<#{opts[:regex_name] || name}>[^\\]]*)\\]"
128
128
  @columns << {:name => name, :type => 'string'}.merge(opts)
129
129
  self
130
130
  end
@@ -7,7 +7,7 @@ in:
7
7
  type: regex
8
8
  regex: ^(?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)" (?<inByte>[0-9]+) (?<outByte>[0-9]+)$
9
9
  columns:
10
- - {name: remote_host, type: string, regexName: remoteHost}
10
+ - {name: remote_host, type: string, regex_name: remoteHost}
11
11
  - {name: identity, type: string}
12
12
  - {name: user, type: string}
13
13
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -17,7 +17,7 @@ in:
17
17
  - {name: status, type: long}
18
18
  - {name: size, type: long}
19
19
  - {name: referer, type: string}
20
- - {name: user_agent, type: string, regexName: userAgent}
21
- - {name: in_byte, type: long, regexName: inByte}
22
- - {name: out_byte, type: long, regexName: outByte}
20
+ - {name: user_agent, type: string, regex_name: userAgent}
21
+ - {name: in_byte, type: long, regex_name: inByte}
22
+ - {name: out_byte, type: long, regex_name: outByte}
23
23
  out: {type: stdout}
@@ -7,8 +7,8 @@ in:
7
7
  type: regex
8
8
  regex: ^(?<forwardedFor>[.:0-9]+|-) (?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)"$
9
9
  columns:
10
- - {name: x_forwarded_for, type: string, regexName: forwardedFor}
11
- - {name: remote_host, type: string, regexName: remoteHost}
10
+ - {name: x_forwarded_for, type: string, regex_name: forwardedFor}
11
+ - {name: remote_host, type: string, regex_name: remoteHost}
12
12
  - {name: identity, type: string}
13
13
  - {name: user, type: string}
14
14
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -18,5 +18,5 @@ in:
18
18
  - {name: status, type: long}
19
19
  - {name: size, type: long}
20
20
  - {name: referer, type: string}
21
- - {name: user_agent, type: string, regexName: userAgent}
21
+ - {name: user_agent, type: string, regex_name: userAgent}
22
22
  out: {type: stdout}
@@ -70,7 +70,7 @@ public class RegexParserPlugin implements ParserPlugin {
70
70
  // TODO: How to Log?
71
71
  continue;
72
72
  } else {
73
- throw new RuntimeException("Unmatched Line: " + line);
73
+ throw new DataException("Unmatched Line: " + line);
74
74
  }
75
75
  }
76
76
 
@@ -100,7 +100,7 @@ public class RegexParserPlugin implements ParserPlugin {
100
100
  String name = c.getName();
101
101
  Type type = c.getType();
102
102
  Column column = c.toColumn(index);
103
- String regexName = c.getOption().get(String.class, "regexName", name);
103
+ String regexName = c.getOption().get(String.class, "regex_name", name);
104
104
 
105
105
  DefaultValueSetter defaultValue = new NullDefaultValueSetter();
106
106
  DynamicColumnSetter setter;
metadata CHANGED
@@ -1,57 +1,54 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ken Morishita
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
- - - "~>"
22
+ - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
- requirement: !ruby/object:Gem::Requirement
29
+ version_requirements: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
- - - "~>"
36
+ - - ~>
39
37
  - !ruby/object:Gem::Version
40
38
  version: '10.0'
41
- description: Parses lines using regular-expression in files read by other file input
42
- plugins.
39
+ prerelease: false
40
+ type: :development
41
+ description: Parses lines using regular-expression in files read by other file input plugins.
43
42
  email:
44
43
  - mokemokechicken@gmail.com
45
44
  executables: []
46
45
  extensions: []
47
46
  extra_rdoc_files: []
48
47
  files:
49
- - ".gitignore"
48
+ - .gitignore
50
49
  - LICENSE.txt
51
50
  - README.md
52
51
  - build.gradle
53
- - classpath/embulk-parser-regex-0.1.0.jar
54
- - embulk-parser-regex.iml
55
52
  - gradle/wrapper/gradle-wrapper.jar
56
53
  - gradle/wrapper/gradle-wrapper.properties
57
54
  - gradlew
@@ -68,28 +65,29 @@ files:
68
65
  - sample/simple/data_simple_1.txt
69
66
  - src/main/java/org/embulk/parser/regex/RegexParserPlugin.java
70
67
  - src/test/java/org/embulk/parser/regex/TestRegexParserPlugin.java
68
+ - classpath/embulk-parser-regex-0.2.0.jar
71
69
  homepage: https://github.com/mokemokechicken/embulk-parser-regex
72
70
  licenses:
73
71
  - MIT
74
72
  metadata: {}
75
- post_install_message:
73
+ post_install_message:
76
74
  rdoc_options: []
77
75
  require_paths:
78
76
  - lib
79
77
  required_ruby_version: !ruby/object:Gem::Requirement
80
78
  requirements:
81
- - - ">="
79
+ - - '>='
82
80
  - !ruby/object:Gem::Version
83
81
  version: '0'
84
82
  required_rubygems_version: !ruby/object:Gem::Requirement
85
83
  requirements:
86
- - - ">="
84
+ - - '>='
87
85
  - !ruby/object:Gem::Version
88
86
  version: '0'
89
87
  requirements: []
90
- rubyforge_project:
91
- rubygems_version: 2.2.2
92
- signing_key:
88
+ rubyforge_project:
89
+ rubygems_version: 2.1.9
90
+ signing_key:
93
91
  specification_version: 4
94
92
  summary: Regex parser plugin for Embulk
95
93
  test_files: []
@@ -1,49 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module external.linked.project.id="embulk-parser-regex" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.0" type="JAVA_MODULE" version="4">
3
- <component name="NewModuleRootManager" inherit-compiler-output="false">
4
- <output url="file://$MODULE_DIR$/build/classes/main" />
5
- <output-test url="file://$MODULE_DIR$/build/classes/test" />
6
- <exclude-output />
7
- <content url="file://$MODULE_DIR$">
8
- <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
9
- <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
10
- <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
11
- <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
12
- <excludeFolder url="file://$MODULE_DIR$/.gradle" />
13
- <excludeFolder url="file://$MODULE_DIR$/build" />
14
- </content>
15
- <orderEntry type="inheritedJdk" />
16
- <orderEntry type="sourceFolder" forTests="false" />
17
- <orderEntry type="library" name="Gradle: org.embulk:embulk-core:0.7.4" level="project" />
18
- <orderEntry type="library" name="Gradle: com.google.guava:guava:18.0" level="project" />
19
- <orderEntry type="library" name="Gradle: com.google.inject:guice:4.0" level="project" />
20
- <orderEntry type="library" name="Gradle: com.google.inject.extensions:guice-multibindings:4.0" level="project" />
21
- <orderEntry type="library" name="Gradle: javax.inject:javax.inject:1" level="project" />
22
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-annotations:2.5.3" level="project" />
23
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-core:2.5.3" level="project" />
24
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-databind:2.5.3" level="project" />
25
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.3" level="project" />
26
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.3" level="project" />
27
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.module:jackson-module-guice:2.5.3" level="project" />
28
- <orderEntry type="library" name="Gradle: ch.qos.logback:logback-classic:1.1.3" level="project" />
29
- <orderEntry type="library" name="Gradle: org.slf4j:slf4j-api:1.7.12" level="project" />
30
- <orderEntry type="library" name="Gradle: org.jruby:jruby-complete:9.0.0.0" level="project" />
31
- <orderEntry type="library" name="Gradle: com.google.code.findbugs:annotations:3.0.0" level="project" />
32
- <orderEntry type="library" name="Gradle: org.yaml:snakeyaml:1.14" level="project" />
33
- <orderEntry type="library" name="Gradle: javax.validation:validation-api:1.1.0.Final" level="project" />
34
- <orderEntry type="library" name="Gradle: org.apache.bval:bval-jsr303:0.5" level="project" />
35
- <orderEntry type="library" name="Gradle: io.airlift:slice:0.9" level="project" />
36
- <orderEntry type="library" name="Gradle: joda-time:joda-time:2.8.1" level="project" />
37
- <orderEntry type="library" name="Gradle: io.netty:netty-buffer:5.0.0.Alpha1" level="project" />
38
- <orderEntry type="library" name="Gradle: org.fusesource.jansi:jansi:1.11" level="project" />
39
- <orderEntry type="library" name="Gradle: com.ibm.icu:icu4j:54.1.1" level="project" />
40
- <orderEntry type="library" name="Gradle: aopalliance:aopalliance:1.0" level="project" />
41
- <orderEntry type="library" name="Gradle: ch.qos.logback:logback-core:1.1.3" level="project" />
42
- <orderEntry type="library" name="Gradle: org.apache.bval:bval-core:0.5" level="project" />
43
- <orderEntry type="library" name="Gradle: org.apache.commons:commons-lang3:3.1" level="project" />
44
- <orderEntry type="library" name="Gradle: io.netty:netty-common:5.0.0.Alpha1" level="project" />
45
- <orderEntry type="library" name="Gradle: commons-beanutils:commons-beanutils-core:1.8.3" level="project" />
46
- <orderEntry type="library" scope="TEST" name="Gradle: junit:junit:4.12" level="project" />
47
- <orderEntry type="library" scope="TEST" name="Gradle: org.hamcrest:hamcrest-core:1.3" level="project" />
48
- </component>
49
- </module>