embulk-parser-regex 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5334bd1ab64c181d169a2eb53c1ae9c0888ee8a2
4
- data.tar.gz: 03e5d5c3d3183d2ce0bd26cd9dfcf1383fb5a5b3
3
+ metadata.gz: 180e9d8ef6ff872d1aa840f937972ba228b2762c
4
+ data.tar.gz: 5ae14fb13a5ac890f8f4c7224cf14fbe660ddb22
5
5
  SHA512:
6
- metadata.gz: 3271593ae11fba49ce1b5171342492249ddfdda1f50c1ad9ca0bed0046fbebdba64efb109cd386bae3d9f3dfb1c24d309fa07d5c32b4a0abdb580810a9f30a1b
7
- data.tar.gz: ebb736e2051d8604e4ec21f9ddcce9aa449dd36e4ac1bf4ccce4a9376220078597a1a4802204213f3c5ede5c9971f39b16df6303c1f00df37f851c634108fcb3
6
+ metadata.gz: bf0f5d97601bd217bc28b955d1046456d9231c9cbf7614c82fe90dfe70fb43716ce09ab3762e0bf2737ffc9a1e6774e23086e7a61c2b5526afc284619df89a0d
7
+ data.tar.gz: 7e1a23835f82c112616998ce5c121ddac25299ac0d91afb934b6a5f8218b295a561a7842446b261a79a83dea9d267dca1d60f827e3a72e99675c824b4ee40a7d
data/.gitignore CHANGED
@@ -6,3 +6,5 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
+ *.iml
10
+ *.gem
data/README.md CHANGED
@@ -11,7 +11,7 @@ A simple parser Using Regular Expression.
11
11
 
12
12
  - **regex**: regular expression that must use [Named Capturing Group](https://blogs.oracle.com/xuemingshen/entry/named_capturing_group_in_jdk7) (string, required)
13
13
  - **columns**: column definition (list of object)
14
- - **regexName**: 'Named Capturing Group' can only include `[a-zA-Z0-9]`, so alias group name in regex can be specified (string, default: `<name> attr value`)
14
+ - **regex_name**: 'Named Capturing Group' can only include `[a-zA-Z0-9]`, so alias group name in regex can be specified (string, default: `<name> attr value`)
15
15
  - **skip_if_unmatch**: if false, when a line don't match the regex, raise RuntimeException. If true, skip the line. (boolean, default: `false`)
16
16
 
17
17
  ## Example
@@ -23,7 +23,7 @@ in:
23
23
  type: regex
24
24
  regex: ^(?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)" (?<inByte>[0-9]+) (?<outByte>[0-9]+)$
25
25
  columns:
26
- - {name: remote_host, type: string, regexName: remoteHost}
26
+ - {name: remote_host, type: string, regex_name: remoteHost}
27
27
  - {name: identity, type: string}
28
28
  - {name: user, type: string}
29
29
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -33,9 +33,9 @@ in:
33
33
  - {name: status, type: long}
34
34
  - {name: size, type: long}
35
35
  - {name: referer, type: string}
36
- - {name: user_agent, type: string, regexName: userAgent}
37
- - {name: in_byte, type: long, regexName: inByte}
38
- - {name: out_byte, type: long, regexName: outByte}
36
+ - {name: user_agent, type: string, regex_name: userAgent}
37
+ - {name: in_byte, type: long, regex_name: inByte}
38
+ - {name: out_byte, type: long, regex_name: outByte}
39
39
  ```
40
40
 
41
41
  ### Guess
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.2.0"
17
17
 
18
18
  dependencies {
19
19
  compile "org.embulk:embulk-core:0.7.4"
@@ -26,12 +26,12 @@ module Embulk
26
26
 
27
27
  def apache_x_forwarded_for
28
28
  RegexApacheLogGuesser.new
29
- .ip_or_minus(:x_forwarded_for, regexName: 'forwardedFor')
29
+ .ip_or_minus(:x_forwarded_for, regex_name: 'forwardedFor')
30
30
  end
31
31
 
32
32
  def apache_common(config, sample_lines)
33
33
  RegexApacheLogGuesser.new
34
- .ip(:remote_host, regexName: 'remoteHost').token(:identity).token(:user)
34
+ .ip(:remote_host, regex_name: 'remoteHost').token(:identity).token(:user)
35
35
  .kakko(:datetime, format: '%d/%b/%Y:%H:%M:%S %z', type: 'timestamp')
36
36
  .method_path_protocol
37
37
  .integer(:status).integer_or_minus(:size)
@@ -39,12 +39,12 @@ module Embulk
39
39
 
40
40
  def apache_combined(config, sample_lines)
41
41
  apache_common(config, sample_lines)
42
- .string(:referer).string(:user_agent, regexName: 'userAgent')
42
+ .string(:referer).string(:user_agent, regex_name: 'userAgent')
43
43
  end
44
44
 
45
45
  def apache_combinedio(config, sample_lines)
46
46
  apache_combined(config, sample_lines)
47
- .integer(:in_byte, regexName: 'inByte').integer(:out_byte, regexName: 'outByte')
47
+ .integer(:in_byte, regex_name: 'inByte').integer(:out_byte, regex_name: 'outByte')
48
48
  end
49
49
  end
50
50
 
@@ -82,49 +82,49 @@ module Embulk
82
82
  end
83
83
 
84
84
  def ip(name, opts={})
85
- @patterns << "(?<#{opts[:regexName] || name}>[.:0-9]+)"
85
+ @patterns << "(?<#{opts[:regex_name] || name}>[.:0-9]+)"
86
86
  @columns << {:name => name, :type => 'string'}.merge(opts)
87
87
  self
88
88
  end
89
89
 
90
90
  def ip_or_minus(name, opts={})
91
- @patterns << "(?<#{opts[:regexName] || name}>[.:0-9]+|-)"
91
+ @patterns << "(?<#{opts[:regex_name] || name}>[.:0-9]+|-)"
92
92
  @columns << {:name => name, :type => 'string'}.merge(opts)
93
93
  self
94
94
  end
95
95
 
96
96
  def token(name, opts={})
97
- @patterns << "(?<#{opts[:regexName] || name}>\\S+)"
97
+ @patterns << "(?<#{opts[:regex_name] || name}>\\S+)"
98
98
  @columns << {:name => name, :type => 'string'}.merge(opts)
99
99
  self
100
100
  end
101
101
 
102
102
  def string(name, opts={})
103
- @patterns << "\"(?<#{opts[:regexName] || name}>[^\"]*)\""
103
+ @patterns << "\"(?<#{opts[:regex_name] || name}>[^\"]*)\""
104
104
  @columns << {:name => name, :type => 'string'}.merge(opts)
105
105
  self
106
106
  end
107
107
 
108
108
  def string_or_minus(name, opts={})
109
- @patterns << "\"(?<#{opts[:regexName] || name}>[^\"]*|-)\""
109
+ @patterns << "\"(?<#{opts[:regex_name] || name}>[^\"]*|-)\""
110
110
  @columns << {:name => name, :type => 'string'}.merge(opts)
111
111
  self
112
112
  end
113
113
 
114
114
  def integer(name, opts={})
115
- @patterns << "(?<#{opts[:regexName] || name}>[0-9]+)"
115
+ @patterns << "(?<#{opts[:regex_name] || name}>[0-9]+)"
116
116
  @columns << {:name => name, :type => 'long'}.merge(opts)
117
117
  self
118
118
  end
119
119
 
120
120
  def integer_or_minus(name, opts={})
121
- @patterns << "(?<#{opts[:regexName] || name}>[0-9]+|-)"
121
+ @patterns << "(?<#{opts[:regex_name] || name}>[0-9]+|-)"
122
122
  @columns << {:name => name, :type => 'long'}.merge(opts)
123
123
  self
124
124
  end
125
125
 
126
126
  def kakko(name, opts={})
127
- @patterns << "\\[(?<#{opts[:regexName] || name}>[^\\]]*)\\]"
127
+ @patterns << "\\[(?<#{opts[:regex_name] || name}>[^\\]]*)\\]"
128
128
  @columns << {:name => name, :type => 'string'}.merge(opts)
129
129
  self
130
130
  end
@@ -7,7 +7,7 @@ in:
7
7
  type: regex
8
8
  regex: ^(?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)" (?<inByte>[0-9]+) (?<outByte>[0-9]+)$
9
9
  columns:
10
- - {name: remote_host, type: string, regexName: remoteHost}
10
+ - {name: remote_host, type: string, regex_name: remoteHost}
11
11
  - {name: identity, type: string}
12
12
  - {name: user, type: string}
13
13
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -17,7 +17,7 @@ in:
17
17
  - {name: status, type: long}
18
18
  - {name: size, type: long}
19
19
  - {name: referer, type: string}
20
- - {name: user_agent, type: string, regexName: userAgent}
21
- - {name: in_byte, type: long, regexName: inByte}
22
- - {name: out_byte, type: long, regexName: outByte}
20
+ - {name: user_agent, type: string, regex_name: userAgent}
21
+ - {name: in_byte, type: long, regex_name: inByte}
22
+ - {name: out_byte, type: long, regex_name: outByte}
23
23
  out: {type: stdout}
@@ -7,8 +7,8 @@ in:
7
7
  type: regex
8
8
  regex: ^(?<forwardedFor>[.:0-9]+|-) (?<remoteHost>[.:0-9]+) (?<identity>\S+) (?<user>\S+) \[(?<datetime>[^\]]*)\] "((?<method>\S+) (?<path>\S+) (?<protocol>HTTP/\d+\.\d+)|-)" (?<status>[0-9]+) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<userAgent>[^"]*)"$
9
9
  columns:
10
- - {name: x_forwarded_for, type: string, regexName: forwardedFor}
11
- - {name: remote_host, type: string, regexName: remoteHost}
10
+ - {name: x_forwarded_for, type: string, regex_name: forwardedFor}
11
+ - {name: remote_host, type: string, regex_name: remoteHost}
12
12
  - {name: identity, type: string}
13
13
  - {name: user, type: string}
14
14
  - {name: datetime, type: timestamp, format: '%d/%b/%Y:%H:%M:%S %z'}
@@ -18,5 +18,5 @@ in:
18
18
  - {name: status, type: long}
19
19
  - {name: size, type: long}
20
20
  - {name: referer, type: string}
21
- - {name: user_agent, type: string, regexName: userAgent}
21
+ - {name: user_agent, type: string, regex_name: userAgent}
22
22
  out: {type: stdout}
@@ -70,7 +70,7 @@ public class RegexParserPlugin implements ParserPlugin {
70
70
  // TODO: How to Log?
71
71
  continue;
72
72
  } else {
73
- throw new RuntimeException("Unmatched Line: " + line);
73
+ throw new DataException("Unmatched Line: " + line);
74
74
  }
75
75
  }
76
76
 
@@ -100,7 +100,7 @@ public class RegexParserPlugin implements ParserPlugin {
100
100
  String name = c.getName();
101
101
  Type type = c.getType();
102
102
  Column column = c.toColumn(index);
103
- String regexName = c.getOption().get(String.class, "regexName", name);
103
+ String regexName = c.getOption().get(String.class, "regex_name", name);
104
104
 
105
105
  DefaultValueSetter defaultValue = new NullDefaultValueSetter();
106
106
  DynamicColumnSetter setter;
metadata CHANGED
@@ -1,57 +1,54 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ken Morishita
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2015-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
- - - "~>"
22
+ - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
- requirement: !ruby/object:Gem::Requirement
29
+ version_requirements: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
- - - "~>"
36
+ - - ~>
39
37
  - !ruby/object:Gem::Version
40
38
  version: '10.0'
41
- description: Parses lines using regular-expression in files read by other file input
42
- plugins.
39
+ prerelease: false
40
+ type: :development
41
+ description: Parses lines using regular-expression in files read by other file input plugins.
43
42
  email:
44
43
  - mokemokechicken@gmail.com
45
44
  executables: []
46
45
  extensions: []
47
46
  extra_rdoc_files: []
48
47
  files:
49
- - ".gitignore"
48
+ - .gitignore
50
49
  - LICENSE.txt
51
50
  - README.md
52
51
  - build.gradle
53
- - classpath/embulk-parser-regex-0.1.0.jar
54
- - embulk-parser-regex.iml
55
52
  - gradle/wrapper/gradle-wrapper.jar
56
53
  - gradle/wrapper/gradle-wrapper.properties
57
54
  - gradlew
@@ -68,28 +65,29 @@ files:
68
65
  - sample/simple/data_simple_1.txt
69
66
  - src/main/java/org/embulk/parser/regex/RegexParserPlugin.java
70
67
  - src/test/java/org/embulk/parser/regex/TestRegexParserPlugin.java
68
+ - classpath/embulk-parser-regex-0.2.0.jar
71
69
  homepage: https://github.com/mokemokechicken/embulk-parser-regex
72
70
  licenses:
73
71
  - MIT
74
72
  metadata: {}
75
- post_install_message:
73
+ post_install_message:
76
74
  rdoc_options: []
77
75
  require_paths:
78
76
  - lib
79
77
  required_ruby_version: !ruby/object:Gem::Requirement
80
78
  requirements:
81
- - - ">="
79
+ - - '>='
82
80
  - !ruby/object:Gem::Version
83
81
  version: '0'
84
82
  required_rubygems_version: !ruby/object:Gem::Requirement
85
83
  requirements:
86
- - - ">="
84
+ - - '>='
87
85
  - !ruby/object:Gem::Version
88
86
  version: '0'
89
87
  requirements: []
90
- rubyforge_project:
91
- rubygems_version: 2.2.2
92
- signing_key:
88
+ rubyforge_project:
89
+ rubygems_version: 2.1.9
90
+ signing_key:
93
91
  specification_version: 4
94
92
  summary: Regex parser plugin for Embulk
95
93
  test_files: []
@@ -1,49 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module external.linked.project.id="embulk-parser-regex" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.0" type="JAVA_MODULE" version="4">
3
- <component name="NewModuleRootManager" inherit-compiler-output="false">
4
- <output url="file://$MODULE_DIR$/build/classes/main" />
5
- <output-test url="file://$MODULE_DIR$/build/classes/test" />
6
- <exclude-output />
7
- <content url="file://$MODULE_DIR$">
8
- <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
9
- <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
10
- <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
11
- <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
12
- <excludeFolder url="file://$MODULE_DIR$/.gradle" />
13
- <excludeFolder url="file://$MODULE_DIR$/build" />
14
- </content>
15
- <orderEntry type="inheritedJdk" />
16
- <orderEntry type="sourceFolder" forTests="false" />
17
- <orderEntry type="library" name="Gradle: org.embulk:embulk-core:0.7.4" level="project" />
18
- <orderEntry type="library" name="Gradle: com.google.guava:guava:18.0" level="project" />
19
- <orderEntry type="library" name="Gradle: com.google.inject:guice:4.0" level="project" />
20
- <orderEntry type="library" name="Gradle: com.google.inject.extensions:guice-multibindings:4.0" level="project" />
21
- <orderEntry type="library" name="Gradle: javax.inject:javax.inject:1" level="project" />
22
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-annotations:2.5.3" level="project" />
23
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-core:2.5.3" level="project" />
24
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.core:jackson-databind:2.5.3" level="project" />
25
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.3" level="project" />
26
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.3" level="project" />
27
- <orderEntry type="library" name="Gradle: com.fasterxml.jackson.module:jackson-module-guice:2.5.3" level="project" />
28
- <orderEntry type="library" name="Gradle: ch.qos.logback:logback-classic:1.1.3" level="project" />
29
- <orderEntry type="library" name="Gradle: org.slf4j:slf4j-api:1.7.12" level="project" />
30
- <orderEntry type="library" name="Gradle: org.jruby:jruby-complete:9.0.0.0" level="project" />
31
- <orderEntry type="library" name="Gradle: com.google.code.findbugs:annotations:3.0.0" level="project" />
32
- <orderEntry type="library" name="Gradle: org.yaml:snakeyaml:1.14" level="project" />
33
- <orderEntry type="library" name="Gradle: javax.validation:validation-api:1.1.0.Final" level="project" />
34
- <orderEntry type="library" name="Gradle: org.apache.bval:bval-jsr303:0.5" level="project" />
35
- <orderEntry type="library" name="Gradle: io.airlift:slice:0.9" level="project" />
36
- <orderEntry type="library" name="Gradle: joda-time:joda-time:2.8.1" level="project" />
37
- <orderEntry type="library" name="Gradle: io.netty:netty-buffer:5.0.0.Alpha1" level="project" />
38
- <orderEntry type="library" name="Gradle: org.fusesource.jansi:jansi:1.11" level="project" />
39
- <orderEntry type="library" name="Gradle: com.ibm.icu:icu4j:54.1.1" level="project" />
40
- <orderEntry type="library" name="Gradle: aopalliance:aopalliance:1.0" level="project" />
41
- <orderEntry type="library" name="Gradle: ch.qos.logback:logback-core:1.1.3" level="project" />
42
- <orderEntry type="library" name="Gradle: org.apache.bval:bval-core:0.5" level="project" />
43
- <orderEntry type="library" name="Gradle: org.apache.commons:commons-lang3:3.1" level="project" />
44
- <orderEntry type="library" name="Gradle: io.netty:netty-common:5.0.0.Alpha1" level="project" />
45
- <orderEntry type="library" name="Gradle: commons-beanutils:commons-beanutils-core:1.8.3" level="project" />
46
- <orderEntry type="library" scope="TEST" name="Gradle: junit:junit:4.12" level="project" />
47
- <orderEntry type="library" scope="TEST" name="Gradle: org.hamcrest:hamcrest-core:1.3" level="project" />
48
- </component>
49
- </module>