embulk-parser-squid_clf_log 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f12d8c5971c2770254ce39cb990e35a444a61fed
4
+ data.tar.gz: 7961699a034dbf0d2012b080c453b3b80f87d1f6
5
+ SHA512:
6
+ metadata.gz: fd3bfc608b7a9c895891377f22a496f0efd094d82f9fadc032cd98aff1d684c1c02e9dd5d04d9bbe81694cb3a1899a14778ccefa620955c9bb61915997ca22c6
7
+ data.tar.gz: 45bda7b8127387f8e453b0a90813cbf70d5c2adf1ec4702bf2cda352921768939cff2a4160aed78e3940b88535c9e8c1eeeefa01b6e909dd34787692b9d02122
@@ -0,0 +1,8 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Squid "Common/Combined Log Format (CLF)" Log parser plugin for Embulk
2
+
3
+ Embulk parser plugin for Squid CLF log (common/combined).
4
+ - Forked from [embulk-parser-apache-log](https://github.com/hiroyuki-sato/embulk-parser-apache-log) (Author: [Hiroyuki Sato](https://github.com/hiroyuki-sato))
5
+
6
+ ## Overview
7
+
8
+ * **Plugin type**: parser
9
+ * **Guess supported**: no
10
+
11
+ ## Configuration
12
+
13
+ - **format**: log format(common, combined) (string, default: combined)
14
+
15
+ ## Example
16
+
17
+ ```yaml
18
+ in:
19
+ type: any file input plugin type
20
+ parser:
21
+ type: squid_clf_log
22
+ format: common
23
+ ```
24
+
25
+ ## Build
26
+
27
+ ```
28
+ $ cd embulk-parser-squid_clf_log
29
+ $ ./gradlew gem
30
+ ```
31
+
32
+ ## Supported log format
33
+
34
+ ```
35
+ logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
36
+ logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
37
+ ```
@@ -0,0 +1,99 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "1.5.0"
4
+ id "java"
5
+ id "checkstyle"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.2.1"
17
+
18
+ sourceCompatibility = 1.8
19
+ targetCompatibility = 1.8
20
+
21
+ dependencies {
22
+ compile "org.embulk:embulk-core:0.9.23"
23
+ provided "org.embulk:embulk-core:0.9.23"
24
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
25
+ testCompile "junit:junit:4.+"
26
+ }
27
+
28
+ task classpath(type: Copy, dependsOn: ["jar"]) {
29
+ doFirst { file("classpath").deleteDir() }
30
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
31
+ into "classpath"
32
+ }
33
+ clean { delete "classpath" }
34
+
35
+ checkstyle {
36
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
37
+ toolVersion = '6.14.1'
38
+ }
39
+ checkstyleMain {
40
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
41
+ ignoreFailures = true
42
+ }
43
+ checkstyleTest {
44
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
45
+ ignoreFailures = true
46
+ }
47
+ task checkstyle(type: Checkstyle) {
48
+ classpath = sourceSets.main.output + sourceSets.test.output
49
+ source = sourceSets.main.allJava + sourceSets.test.allJava
50
+ }
51
+
52
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
53
+ jrubyArgs "-S"
54
+ script "gem"
55
+ scriptArgs "build", "${project.name}.gemspec"
56
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
57
+ }
58
+
59
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
60
+ jrubyArgs "-S"
61
+ script "gem"
62
+ scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
63
+ }
64
+
65
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
66
+ doLast {
67
+ println "> Build succeeded."
68
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
69
+ }
70
+ }
71
+
72
+ task gemspec {
73
+ ext.gemspecFile = file("${project.name}.gemspec")
74
+ inputs.file "build.gradle"
75
+ outputs.file gemspecFile
76
+ doLast { gemspecFile.write($/
77
+ Gem::Specification.new do |spec|
78
+ spec.name = "${project.name}"
79
+ spec.version = "${project.version}"
80
+ spec.authors = ["Mitsuki Shirase"]
81
+ spec.summary = %[Squid Clf Log parser plugin for Embulk]
82
+ spec.description = %[Parses Squid Clf Log files read by other file input plugins.]
83
+ spec.email = ["lunatilia@users.noreply.github.com"]
84
+ spec.licenses = ["MIT"]
85
+ spec.homepage = "https://github.com/lunatilia/embulk-parser-squid_clf_log"
86
+
87
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
88
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
89
+ spec.require_paths = ["lib"]
90
+
91
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
92
+ spec.add_development_dependency 'bundler', ['~> 1.0']
93
+ spec.add_development_dependency 'rake', ['~> 12.0']
94
+ end
95
+ /$)
96
+ }
97
+ }
98
+ clean { delete "${project.name}.gemspec" }
99
+
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -0,0 +1,5 @@
1
+ distributionBase=GRADLE_USER_HOME
2
+ distributionPath=wrapper/dists
3
+ zipStoreBase=GRADLE_USER_HOME
4
+ zipStorePath=wrapper/dists
5
+ distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env sh
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
26
+
27
+ APP_NAME="Gradle"
28
+ APP_BASE_NAME=`basename "$0"`
29
+
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
33
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
34
+ MAX_FD="maximum"
35
+
36
+ warn () {
37
+ echo "$*"
38
+ }
39
+
40
+ die () {
41
+ echo
42
+ echo "$*"
43
+ echo
44
+ exit 1
45
+ }
46
+
47
+ # OS specific support (must be 'true' or 'false').
48
+ cygwin=false
49
+ msys=false
50
+ darwin=false
51
+ nonstop=false
52
+ case "`uname`" in
53
+ CYGWIN* )
54
+ cygwin=true
55
+ ;;
56
+ Darwin* )
57
+ darwin=true
58
+ ;;
59
+ MINGW* )
60
+ msys=true
61
+ ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
65
+ esac
66
+
67
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68
+
69
+ # Determine the Java command to use to start the JVM.
70
+ if [ -n "$JAVA_HOME" ] ; then
71
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72
+ # IBM's JDK on AIX uses strange locations for the executables
73
+ JAVACMD="$JAVA_HOME/jre/sh/java"
74
+ else
75
+ JAVACMD="$JAVA_HOME/bin/java"
76
+ fi
77
+ if [ ! -x "$JAVACMD" ] ; then
78
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79
+
80
+ Please set the JAVA_HOME variable in your environment to match the
81
+ location of your Java installation."
82
+ fi
83
+ else
84
+ JAVACMD="java"
85
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86
+
87
+ Please set the JAVA_HOME variable in your environment to match the
88
+ location of your Java installation."
89
+ fi
90
+
91
+ # Increase the maximum file descriptors if we can.
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93
+ MAX_FD_LIMIT=`ulimit -H -n`
94
+ if [ $? -eq 0 ] ; then
95
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96
+ MAX_FD="$MAX_FD_LIMIT"
97
+ fi
98
+ ulimit -n $MAX_FD
99
+ if [ $? -ne 0 ] ; then
100
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
101
+ fi
102
+ else
103
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104
+ fi
105
+ fi
106
+
107
+ # For Darwin, add options to specify how the application appears in the dock
108
+ if $darwin; then
109
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110
+ fi
111
+
112
+ # For Cygwin, switch paths to Windows format before running java
113
+ if $cygwin ; then
114
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Escape application args
158
+ save () {
159
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160
+ echo " "
161
+ }
162
+ APP_ARGS=$(save "$@")
163
+
164
+ # Collect all arguments for the java command, following the shell quoting and substitution rules
165
+ eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166
+
167
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168
+ if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169
+ cd "$(dirname "$0")"
170
+ fi
171
+
172
+ exec "$JAVACMD" "$@"
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,61 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "squid_clf_log" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ # class SquidClfLog < GuessPlugin
12
+ # Plugin.register_guess("squid_clf_log", self)
13
+ #
14
+ # def guess(config, sample_buffer)
15
+ # if sample_buffer[0,2] == GZIP_HEADER
16
+ # guessed = {}
17
+ # guessed["type"] = "squid_clf_log"
18
+ # guessed["property1"] = "guessed-value"
19
+ # return {"parser" => guessed}
20
+ # else
21
+ # return {}
22
+ # end
23
+ # end
24
+ # end
25
+
26
+ # class SquidClfLog < TextGuessPlugin
27
+ # Plugin.register_guess("squid_clf_log", self)
28
+ #
29
+ # def guess_text(config, sample_text)
30
+ # js = JSON.parse(sample_text) rescue nil
31
+ # if js && js["mykeyword"] == "keyword"
32
+ # guessed = {}
33
+ # guessed["type"] = "squid_clf_log"
34
+ # guessed["property1"] = "guessed-value"
35
+ # return {"parser" => guessed}
36
+ # else
37
+ # return {}
38
+ # end
39
+ # end
40
+ # end
41
+
42
+ # class SquidClfLog < LineGuessPlugin
43
+ # Plugin.register_guess("squid_clf_log", self)
44
+ #
45
+ # def guess_lines(config, sample_lines)
46
+ # all_line_matched = sample_lines.all? do |line|
47
+ # line =~ /mypattern/
48
+ # end
49
+ # if all_line_matched
50
+ # guessed = {}
51
+ # guessed["type"] = "squid_clf_log"
52
+ # guessed["property1"] = "guessed-value"
53
+ # return {"parser" => guessed}
54
+ # else
55
+ # return {}
56
+ # end
57
+ # end
58
+ # end
59
+
60
+ end
61
+ end
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "squid_clf_log", "org.embulk.parser.squid_clf_log.SquidClfLogParserPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,196 @@
1
+ package org.embulk.parser.squid_clf_log;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.ConfigDiff;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.Task;
8
+ import org.embulk.config.TaskSource;
9
+ import org.embulk.spi.ParserPlugin;
10
+ import org.embulk.spi.FileInput;
11
+ import org.embulk.spi.PageOutput;
12
+ import org.embulk.spi.Schema;
13
+ import org.embulk.spi.SchemaConfig;
14
+
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.util.LineDecoder;
18
+ import org.embulk.spi.type.TimestampType;
19
+ import org.embulk.spi.time.TimestampParser;
20
+ import org.embulk.spi.time.TimestampParseException;
21
+ import org.embulk.spi.ColumnConfig;
22
+ import java.util.ArrayList;
23
+
24
+ //import static org.embulk.spi.type.Types.BOOLEAN;
25
+ //import static org.embulk.spi.type.Types.DOUBLE;
26
+ //import static org.embulk.spi.type.Types.LONG;
27
+ import static org.embulk.spi.type.Types.STRING;
28
+ import static org.embulk.spi.type.Types.TIMESTAMP;
29
+
30
+ import java.util.regex.Matcher;
31
+ import java.util.regex.Pattern;
32
+
33
+ import com.google.common.base.Throwables;
34
+
35
+ import org.slf4j.Logger;
36
+
37
+ public class SquidClfLogParserPlugin
38
+ implements ParserPlugin
39
+ {
40
+ private static final Logger logger = Exec.getLogger(SquidClfLogParserPlugin.class);
41
+
42
+ public enum LogFormat
43
+ {
44
+ combined("combined"),
45
+ common("common");
46
+ private final String string;
47
+
48
+ private LogFormat(String string)
49
+ {
50
+ this.string = string;
51
+ }
52
+ public String getString()
53
+ {
54
+ return string;
55
+ }
56
+ }
57
+ public interface PluginTask
58
+ extends Task, LineDecoder.DecoderTask, TimestampParser.Task
59
+ {
60
+
61
+ @Config("format")
62
+ @ConfigDefault("\"combined\"")
63
+ public LogFormat getFormat();
64
+
65
+ @Config("stop_on_invalid_record")
66
+ @ConfigDefault("true")
67
+ Boolean getStopOnInvalidRecord();
68
+ }
69
+
70
+ @Override
71
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
72
+ {
73
+ PluginTask task = config.loadConfig(PluginTask.class);
74
+ ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
75
+ final LogFormat format = task.getFormat();
76
+
77
+ columns.add(new ColumnConfig("client-src-ip-address" ,STRING ,config));
78
+ columns.add(new ColumnConfig("request-username-ident",STRING ,config));
79
+ columns.add(new ColumnConfig("request-username" ,STRING ,config));
80
+ columns.add(new ColumnConfig("request-time" ,TIMESTAMP,config));
81
+ columns.add(new ColumnConfig("request-method" ,STRING ,config));
82
+ columns.add(new ColumnConfig("request-url" ,STRING ,config));
83
+ columns.add(new ColumnConfig("request-protocol" ,STRING ,config));
84
+ columns.add(new ColumnConfig("response-status" ,STRING ,config));
85
+ columns.add(new ColumnConfig("response-bytes" ,STRING ,config));
86
+
87
+ // combined
88
+ if( format == LogFormat.combined ){
89
+ columns.add(new ColumnConfig("referer" ,STRING ,config));
90
+ columns.add(new ColumnConfig("user-agent" ,STRING ,config));
91
+ }
92
+
93
+ // squid status
94
+ columns.add(new ColumnConfig("squid-status" ,STRING ,config));
95
+ columns.add(new ColumnConfig("squid-hierarchy-status",STRING ,config));
96
+
97
+ Schema schema = new SchemaConfig(columns).toSchema();
98
+ control.run(task.dump(), schema);
99
+ }
100
+
101
+ private static interface ParserIntlTask extends Task, TimestampParser.Task {}
102
+ private static interface ParserIntlColumnOption extends Task, TimestampParser.TimestampColumnOption {}
103
+
104
+ @Override
105
+ public void run(TaskSource taskSource, Schema schema,
106
+ FileInput input, PageOutput output)
107
+ {
108
+ PluginTask task = taskSource.loadTask(PluginTask.class);
109
+ LineDecoder lineDecoder = new LineDecoder(input,task);
110
+ PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output);
111
+ String line = null;
112
+ final LogFormat format = task.getFormat();
113
+
114
+ Pattern accessLogPattern = Pattern.compile(getAccessLogRegex(format),
115
+ Pattern.CASE_INSENSITIVE
116
+ | Pattern.DOTALL);
117
+ Matcher accessLogEntryMatcher;
118
+ // TODO: Switch to a newer TimestampParser constructor after a reasonable interval.
119
+ // Traditional constructor is used here for compatibility.
120
+ final ConfigSource configSource = Exec.newConfigSource();
121
+ configSource.set("format", "%d/%b/%Y:%T %z");
122
+ configSource.set("timezone", task.getDefaultTimeZone());
123
+ final TimestampParser time_parser = new TimestampParser(
124
+ Exec.newConfigSource().loadConfig(ParserIntlTask.class),
125
+ configSource.loadConfig(ParserIntlColumnOption.class));
126
+
127
+ while( input.nextFile() ){
128
+ while(true){
129
+ line = lineDecoder.poll();
130
+
131
+ if( line == null ){
132
+ break;
133
+ }
134
+ accessLogEntryMatcher = accessLogPattern.matcher(line);
135
+
136
+ if(!accessLogEntryMatcher.matches()){
137
+ if (task.getStopOnInvalidRecord()) {
138
+ throw new RuntimeException("unmatched line" + line);
139
+ } else {
140
+ logger.info("unable to parse line: " + line);
141
+ continue;
142
+ }
143
+ }
144
+
145
+ pageBuilder.setString(0,accessLogEntryMatcher.group(1));
146
+ pageBuilder.setString(1,accessLogEntryMatcher.group(2));
147
+ pageBuilder.setString(2,accessLogEntryMatcher.group(3));
148
+ try {
149
+ pageBuilder.setTimestamp(3,time_parser.parse(accessLogEntryMatcher.group(4)));
150
+ } catch(TimestampParseException ex) {
151
+ if (task.getStopOnInvalidRecord()) {
152
+ throw Throwables.propagate(ex);
153
+ } else {
154
+ logger.info("unable to parse time from line: " + line);
155
+ continue;
156
+ }
157
+ }
158
+ pageBuilder.setString(4,accessLogEntryMatcher.group(5));
159
+ pageBuilder.setString(5,accessLogEntryMatcher.group(6));
160
+ pageBuilder.setString(6,accessLogEntryMatcher.group(7));
161
+ pageBuilder.setString(7,accessLogEntryMatcher.group(8));
162
+ pageBuilder.setString(8,accessLogEntryMatcher.group(9));
163
+ pageBuilder.setString(9,accessLogEntryMatcher.group(10));
164
+ pageBuilder.setString(10,accessLogEntryMatcher.group(11));
165
+ if( format == LogFormat.combined ){
166
+ pageBuilder.setString(11,accessLogEntryMatcher.group(12));
167
+ pageBuilder.setString(12,accessLogEntryMatcher.group(13));
168
+ }
169
+ pageBuilder.addRecord();
170
+ }
171
+ }
172
+ pageBuilder.finish();
173
+ }
174
+
175
+ private String getAccessLogRegex(LogFormat type)
176
+ {
177
+ final String nospace = "(\\S+)"; // a single token (no spaces)
178
+ final String timestamp = "\\[([^\\]]+)\\]"; // something between [ and ]
179
+ final String quotestr = "\"(.*?)\""; // a quoted string
180
+ final String uint = "(\\d+)"; // unsigned integer
181
+ final String query = "\"(\\S+)\\s(.*?)\\s(\\S+{4}\\/\\d+\\.\\d+)\""; // method, path, protocol
182
+ final String sqstat = "(\\S+)\\:(\\S+)"; // squid status
183
+
184
+ String rex;
185
+
186
+ if( type == LogFormat.combined ){
187
+ rex = "^" + String.join( " ", nospace, nospace, nospace, timestamp, query,
188
+ uint, nospace, quotestr, quotestr, sqstat) + "$";
189
+ } else {
190
+ rex = "^" + String.join( " ", nospace, nospace, nospace, timestamp, query,
191
+ uint, nospace, sqstat) + "$";
192
+ }
193
+
194
+ return rex;
195
+ }
196
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.parser.squid_clf_log;
2
+
3
+ public class TestSquidClfLogParserPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-squid_clf_log
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ platform: ruby
6
+ authors:
7
+ - Mitsuki Shirase
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-03-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '12.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ description: Parses Squid Clf Log files read by other file input plugins.
42
+ email:
43
+ - lunatilia@users.noreply.github.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - classpath/embulk-parser-squid_clf_log-0.2.1.jar
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
55
+ - gradle/wrapper/gradle-wrapper.jar
56
+ - gradle/wrapper/gradle-wrapper.properties
57
+ - gradlew
58
+ - gradlew.bat
59
+ - lib/embulk/guess/squid_clf_log.rb
60
+ - lib/embulk/parser/squid_clf_log.rb
61
+ - src/main/java/org/embulk/parser/squid_clf_log/SquidClfLogParserPlugin.java
62
+ - src/test/java/org/embulk/parser/squid_clf_log/TestSquidClfLogParserPlugin.java
63
+ homepage: https://github.com/lunatilia/embulk-parser-squid_clf_log
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.6.8
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Squid Clf Log parser plugin for Embulk
87
+ test_files: []