embulk-parser-apache-log 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -1,6 +1,5 @@
1
- #Wed Feb 04 13:46:12 PST 2015
2
1
  distributionBase=GRADLE_USER_HOME
3
2
  distributionPath=wrapper/dists
4
3
  zipStoreBase=GRADLE_USER_HOME
5
4
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
5
+ distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
data/gradlew CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env bash
1
+ #!/usr/bin/env sh
2
2
 
3
3
  ##############################################################################
4
4
  ##
@@ -6,20 +6,38 @@
6
6
  ##
7
7
  ##############################################################################
8
8
 
9
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
- DEFAULT_JVM_OPTS=""
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
11
26
 
12
27
  APP_NAME="Gradle"
13
28
  APP_BASE_NAME=`basename "$0"`
14
29
 
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
15
33
  # Use the maximum available, or set MAX_FD != -1 to use that value.
16
34
  MAX_FD="maximum"
17
35
 
18
- warn ( ) {
36
+ warn () {
19
37
  echo "$*"
20
38
  }
21
39
 
22
- die ( ) {
40
+ die () {
23
41
  echo
24
42
  echo "$*"
25
43
  echo
@@ -30,6 +48,7 @@ die ( ) {
30
48
  cygwin=false
31
49
  msys=false
32
50
  darwin=false
51
+ nonstop=false
33
52
  case "`uname`" in
34
53
  CYGWIN* )
35
54
  cygwin=true
@@ -40,31 +59,11 @@ case "`uname`" in
40
59
  MINGW* )
41
60
  msys=true
42
61
  ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
43
65
  esac
44
66
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
- # Attempt to set APP_HOME
51
- # Resolve links: $0 may be a link
52
- PRG="$0"
53
- # Need this for relative symlinks.
54
- while [ -h "$PRG" ] ; do
55
- ls=`ls -ld "$PRG"`
56
- link=`expr "$ls" : '.*-> \(.*\)$'`
57
- if expr "$link" : '/.*' > /dev/null; then
58
- PRG="$link"
59
- else
60
- PRG=`dirname "$PRG"`"/$link"
61
- fi
62
- done
63
- SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
65
- APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
67
-
68
67
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
68
 
70
69
  # Determine the Java command to use to start the JVM.
@@ -90,7 +89,7 @@ location of your Java installation."
90
89
  fi
91
90
 
92
91
  # Increase the maximum file descriptors if we can.
93
- if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
94
93
  MAX_FD_LIMIT=`ulimit -H -n`
95
94
  if [ $? -eq 0 ] ; then
96
95
  if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -114,6 +113,7 @@ fi
114
113
  if $cygwin ; then
115
114
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
115
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
117
 
118
118
  # We build the pattern for arguments to be converted via cygpath
119
119
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -154,11 +154,19 @@ if $cygwin ; then
154
154
  esac
155
155
  fi
156
156
 
157
- # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
- function splitJvmOpts() {
159
- JVM_OPTS=("$@")
157
+ # Escape application args
158
+ save () {
159
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160
+ echo " "
160
161
  }
161
- eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
- JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
162
+ APP_ARGS=$(save "$@")
163
+
164
+ # Collect all arguments for the java command, following the shell quoting and substitution rules
165
+ eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166
+
167
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168
+ if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169
+ cd "$(dirname "$0")"
170
+ fi
163
171
 
164
- exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
172
+ exec "$JAVACMD" "$@"
@@ -51,7 +51,7 @@ public class ApacheLogParserPlugin
51
51
  }
52
52
  }
53
53
  public interface PluginTask
54
- extends Task, LineDecoder.DecoderTask, TimestampParser.ParserTask
54
+ extends Task, LineDecoder.DecoderTask, TimestampParser.Task
55
55
  {
56
56
 
57
57
  @Config("format")
@@ -67,26 +67,29 @@ public class ApacheLogParserPlugin
67
67
  ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
68
68
  final LogFormat format = task.getFormat();
69
69
 
70
- columns.add(new ColumnConfig("remote_host",STRING ,null));
71
- columns.add(new ColumnConfig("identity_check",STRING ,null));
72
- columns.add(new ColumnConfig("user",STRING ,null));
73
- columns.add(new ColumnConfig("datetime",TIMESTAMP,null));
74
- columns.add(new ColumnConfig("method",STRING ,null));
75
- columns.add(new ColumnConfig("path",STRING ,null));
76
- columns.add(new ColumnConfig("protocol",STRING ,null));
77
- columns.add(new ColumnConfig("status",STRING ,null));
78
- columns.add(new ColumnConfig("size",STRING ,null));
70
+ columns.add(new ColumnConfig("remote_host",STRING ,config));
71
+ columns.add(new ColumnConfig("identity_check",STRING ,config));
72
+ columns.add(new ColumnConfig("user",STRING ,config));
73
+ columns.add(new ColumnConfig("datetime",TIMESTAMP,config));
74
+ columns.add(new ColumnConfig("method",STRING ,config));
75
+ columns.add(new ColumnConfig("path",STRING ,config));
76
+ columns.add(new ColumnConfig("protocol",STRING ,config));
77
+ columns.add(new ColumnConfig("status",STRING ,config));
78
+ columns.add(new ColumnConfig("size",STRING ,config));
79
79
 
80
80
  // combined
81
81
  if( format == LogFormat.combined ){
82
- columns.add(new ColumnConfig("referer",STRING ,null));
83
- columns.add(new ColumnConfig("user_agent",STRING ,null));
82
+ columns.add(new ColumnConfig("referer",STRING ,config));
83
+ columns.add(new ColumnConfig("user_agent",STRING ,config));
84
84
  }
85
85
 
86
86
  Schema schema = new SchemaConfig(columns).toSchema();
87
87
  control.run(task.dump(), schema);
88
88
  }
89
89
 
90
+ private static interface ParserIntlTask extends Task, TimestampParser.Task {}
91
+ private static interface ParserIntlColumnOption extends Task, TimestampParser.TimestampColumnOption {}
92
+
90
93
  @Override
91
94
  public void run(TaskSource taskSource, Schema schema,
92
95
  FileInput input, PageOutput output)
@@ -101,7 +104,14 @@ public class ApacheLogParserPlugin
101
104
  Pattern.CASE_INSENSITIVE
102
105
  | Pattern.DOTALL);
103
106
  Matcher accessLogEntryMatcher;
104
- final TimestampParser time_parser = new TimestampParser("%d/%b/%Y:%T %z",task);
107
+ // TODO: Switch to a newer TimestampParser constructor after a reasonable interval.
108
+ // Traditional constructor is used here for compatibility.
109
+ final ConfigSource configSource = Exec.newConfigSource();
110
+ configSource.set("format", "%d/%b/%Y:%T %z");
111
+ configSource.set("timezone", task.getDefaultTimeZone());
112
+ final TimestampParser time_parser = new TimestampParser(
113
+ Exec.newConfigSource().loadConfig(ParserIntlTask.class),
114
+ configSource.loadConfig(ParserIntlColumnOption.class));
105
115
 
106
116
  while( input.nextFile() ){
107
117
  while(true){
@@ -144,17 +154,18 @@ public class ApacheLogParserPlugin
144
154
  final String rexa = "(\\d+(?:\\.\\d+){3})"; // an IP address
145
155
  final String rexs = "(\\S+)"; // a single token (no spaces)
146
156
  final String rexdt = "\\[([^\\]]+)\\]"; // something between [ and ]
147
- final String rexstr = "\"([^\"]*?)\""; // a quoted string
157
+ final String rexstr = "\"(.*?)\""; // a quoted string
148
158
  final String rexi = "(\\d+)"; // unsigned integer
149
- final String rexp = "\"(\\S+)\\s(\\S+)\\s(\\S+)\""; // method, path, protocol
159
+ final String rexp = "\"(\\S+)\\s(.*?)\\s(HTTP\\/\\d+\\.\\d+)\""; // method, path, protocol
160
+
150
161
  String rex;
151
162
 
152
163
  if( type == LogFormat.combined ){
153
164
  rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
154
- rexi, rexi, rexstr, rexstr) + "$";
165
+ rexi, rexs, rexstr, rexstr) + "$";
155
166
  } else {
156
167
  rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
157
- rexi, rexi) + "$";
168
+ rexi, rexs) + "$";
158
169
  }
159
170
 
160
171
  return rex;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-apache-log
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroyuki Sato
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-30 00:00:00.000000000 Z
11
+ date: 2017-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -50,6 +50,10 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
55
+ - example/config.yml
56
+ - example/sample.log
53
57
  - gradle/wrapper/gradle-wrapper.jar
54
58
  - gradle/wrapper/gradle-wrapper.properties
55
59
  - gradlew
@@ -58,7 +62,7 @@ files:
58
62
  - lib/embulk/parser/apache-log.rb
59
63
  - src/main/java/org/embulk/parser/ApacheLogParserPlugin.java
60
64
  - src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java
61
- - classpath/embulk-parser-apache-log-0.1.0.jar
65
+ - classpath/embulk-parser-apache-log-0.1.1.jar
62
66
  homepage: https://github.com/hiroyuki-sato/embulk-parser-apache-log
63
67
  licenses:
64
68
  - MIT