embulk-parser-xpath2 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c6ca2b4389dd1a0c784bd26735599b07bca338a
4
- data.tar.gz: f9514cf861f316e940779590896042e8b005ab4a
3
+ metadata.gz: 2aebf3205f36802a6d5240064e69da2e4481ab32
4
+ data.tar.gz: c74a3fb9310df91b0695bc83a2495bbac12860e0
5
5
  SHA512:
6
- metadata.gz: 62cb373a04138dba8e690592f23a15259eb9d95bda20c9a1cffdd3f6e4da5e36e897bd4931258f576dd5e09007e67907e0b92c98df1caba41d6eb4c4ec02ab1b
7
- data.tar.gz: 44c6b99ddbdedbe24d42e4dc9fc2bdf70ee6c14032feeed191a582f190d0e8bfbf5a73236def253d7528a4fc5c7113aa3181bbe636d4c94d545a585a69b646d4
6
+ metadata.gz: e5d7656c75d2c8c7a82d266fa26fba9730f798551d0e573c1924d9ab40d3debc40f3aec0a9219327e23c5e8282be3e9d9b206768b6783d7d95cb137e784984ea
7
+ data.tar.gz: 07d9a54720290242e298724c80743bb75997e6d1149fad566380195c282e6e8860c18a07bf810949ca5e522283d19cabcdc9a2062dffa6272d4d33f78302b40a
data/.gitignore CHANGED
@@ -9,8 +9,7 @@ build/
9
9
  /.settings/
10
10
  /.metadata/
11
11
  .classpath
12
- .project
12
+ project/project
13
13
  /bin/
14
- project
15
14
  *.iml
16
15
  out
data/.travis.yml ADDED
@@ -0,0 +1,21 @@
1
+ language: scala
2
+ scala:
3
+ - 2.12.4
4
+ jdk:
5
+ - oraclejdk8
6
+ script:
7
+ - gradle test
8
+ - gradle gem
9
+
10
+ before_deploy:
11
+ - echo '---' > ~/.gem/credentials
12
+ - echo ':rubygems_api_key:' ${RUBYGEMS_API_KEY} > ~/.gem/credentials
13
+ - chmod 0600 ~/.gem/credentials
14
+
15
+ deploy:
16
+ provider: script
17
+ script:
18
+ - gradle gemPush
19
+ on:
20
+ tags: true
21
+ all_branches: true
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
1
  # Xml parser plugin for Embulk
2
+ [![Gem Version](https://badge.fury.io/rb/embulk-parser-xpath2.svg)](https://badge.fury.io/rb/embulk-parser-xpath2)
3
+ [![Build Status](https://travis-ci.org/maji-KY/embulk-parser-xpath2.svg?branch=develop)](https://travis-ci.org/maji-KY/embulk-parser-xpath2)
2
4
 
3
5
  Embulk parser plugin for parsing xml data by XPath perfectly!
4
6
 
@@ -54,3 +56,9 @@ Then you can fetch entries from the following xml:
54
56
  ```
55
57
  $ ./gradlew gem
56
58
  ```
59
+
60
+ ## Benchmark
61
+
62
+ ```
63
+ $ sbt benchmark/jmh:run
64
+ ```
@@ -0,0 +1,99 @@
1
+ package org.embulk.parser.xpath2
2
+
3
+ import java.io.{InputStream, PipedInputStream, PipedOutputStream}
4
+
5
+ import org.embulk.EmbulkTestRuntime
6
+ import org.embulk.config.TaskSource
7
+ import org.embulk.spi.util.InputStreamFileInput
8
+ import org.embulk.spi.{Exec, Schema}
9
+ import org.openjdk.jmh.annotations.Benchmark
10
+
11
+ import scala.collection.mutable
12
+
13
+ class ParseBenchmark {
14
+ import ParseBenchmark._
15
+
16
+ @Benchmark
17
+ def run(): Unit = {
18
+ Exec.doWith(runtime.getExec, () => {
19
+ val configSource = test.configSource
20
+ val task = configSource.loadConfig(classOf[PluginTask])
21
+
22
+ var schema: Schema = null
23
+
24
+ val plugin = new XPath2ParserPlugin()
25
+ plugin.transaction(configSource, (_: TaskSource, s: Schema) => {schema = s})
26
+
27
+ val result: mutable.Buffer[collection.mutable.Map[String, Any]] = mutable.Buffer()
28
+
29
+ plugin.run(
30
+ task.dump(),
31
+ schema,
32
+ new InputStreamFileInput(Exec.getBufferAllocator(), testDataInput),
33
+ new TestTransactionalPageOutput(schema, result)
34
+ )
35
+
36
+ require(result.size == TestRecordSize)
37
+ })
38
+ }
39
+
40
+ }
41
+
42
+ object ParseBenchmark {
43
+
44
+ val TestRecordSize = 1000
45
+
46
+ val test = new XPath2ParserPluginSpec()
47
+ val runtime = new EmbulkTestRuntime
48
+
49
+ val testDataXmlEntry =
50
+ """ <ns2:entry>
51
+ | <ns2:id>1</ns2:id>
52
+ | <ns2:title>Hello!</ns2:title>
53
+ | <ns2:meta>
54
+ | <ns2:author>maji-KY</ns2:author>
55
+ | </ns2:meta>
56
+ | <ns2:date>20010101</ns2:date>
57
+ | <ns2:dateTime>2000-12-31 15:00:00</ns2:dateTime>
58
+ | <ns2:list>
59
+ | <ns2:value>a</ns2:value>
60
+ | <ns2:value>b</ns2:value>
61
+ | <ns2:value>c</ns2:value>
62
+ | </ns2:list>
63
+ | <ns2:rating by="subscribers">2.5</ns2:rating>
64
+ | <ns2:rating>3.5</ns2:rating>
65
+ | <ns2:released>true</ns2:released>
66
+ | </ns2:entry>
67
+ """.stripMargin.getBytes
68
+
69
+ def testDataInput: InputStream = {
70
+ val header =
71
+ """<?xml version="1.0"?>
72
+ |<ns1:root
73
+ | xmlns:ns1="http://example.com/ns1/"
74
+ | xmlns:ns2="http://example.com/ns2/">
75
+ """.stripMargin.getBytes
76
+ val footer =
77
+ """
78
+ |</ns1:root>""".stripMargin.getBytes
79
+
80
+ val pipedOut = new PipedOutputStream
81
+ val pipedIn = new PipedInputStream(pipedOut)
82
+ new Thread() {
83
+ override def run(): Unit = {
84
+ pipedOut.write(header)
85
+ 1 to TestRecordSize foreach { _ =>
86
+ pipedOut.write(testDataXmlEntry)
87
+ }
88
+ pipedOut.write(footer)
89
+ pipedOut.close()
90
+ }
91
+ }.start()
92
+ pipedIn
93
+ }
94
+
95
+ def main(args: Array[String]): Unit = {
96
+ new ParseBenchmark().run()
97
+ }
98
+
99
+ }
data/build.gradle CHANGED
@@ -13,19 +13,22 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.3"
16
+ version = "0.0.4"
17
+ ext {
18
+ embulkVersion = "0.8.35"
19
+ }
17
20
 
18
21
  sourceCompatibility = 1.8
19
22
  targetCompatibility = 1.8
20
23
 
21
24
  dependencies {
22
- compile "org.embulk:embulk-core:0.8.32"
23
- provided "org.embulk:embulk-core:0.8.32"
24
- testCompile "org.embulk:embulk-core:0.8.32:tests"
25
- testCompile "org.embulk:embulk-standards:0.8.32"
25
+ compile "org.embulk:embulk-core:${embulkVersion}"
26
+ provided "org.embulk:embulk-core:${embulkVersion}"
27
+ testCompile "org.embulk:embulk-core:${embulkVersion}:tests"
28
+ testCompile "org.embulk:embulk-standards:${embulkVersion}"
26
29
  testCompile "junit:junit:4.+"
27
30
 
28
- compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.3'
31
+ compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.4'
29
32
  testCompile group: 'org.scalatest', name: 'scalatest_2.12', version: '3.0.4'
30
33
 
31
34
  }
@@ -37,6 +40,13 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
37
40
  }
38
41
  clean { delete "classpath" }
39
42
 
43
+ test {
44
+ testLogging {
45
+ events 'failed'
46
+ exceptionFormat 'full'
47
+ }
48
+ }
49
+
40
50
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
41
51
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
42
52
  script "${project.name}.gemspec"
data/build.sbt CHANGED
@@ -1,23 +1,34 @@
1
- lazy val root = (project in file(".")).
2
- settings(
3
- inThisBuild(List(
4
- organization := "com.github.maji-KY",
5
- scalaVersion := "2.12.3",
6
- version := "0.0.1-SNAPSHOT"
7
- )),
8
- name := "embulk-parser-xpath2",
9
- scalacOptions ++= Seq(
10
- "-deprecation",
11
- "-feature",
12
- "-unchecked",
13
- "-Xlint",
14
- "-Ywarn-dead-code",
15
- "-Ywarn-numeric-widen",
16
- "-Ywarn-unused",
17
- "-Ywarn-value-discard"
18
- )
1
+ val embulkVersion = "0.8.35"
2
+
3
+ lazy val commonSettings = Seq(
4
+ organization := "com.github.maji-KY",
5
+ scalaVersion := "2.12.4",
6
+ version := "CANNOT_RELEASE",
7
+ scalacOptions ++= Seq(
8
+ "-deprecation",
9
+ "-feature",
10
+ "-unchecked",
11
+ "-Xlint",
12
+ "-Ywarn-dead-code",
13
+ "-Ywarn-numeric-widen",
14
+ "-Ywarn-unused",
15
+ "-Ywarn-value-discard"
16
+ ),
17
+ resolvers += Resolver.jcenterRepo,
18
+ libraryDependencies ++= Seq(
19
+ "org.embulk" % "embulk-core" % embulkVersion,
20
+ "org.embulk" % "embulk-core" % embulkVersion classifier "tests",
21
+ "junit" % "junit" % "4.+" % "test",
22
+ "org.scalatest" %% "scalatest" % "3.0.4" % "test"
19
23
  )
24
+ )
25
+
26
+ lazy val benchmark = (project in file("benchmark"))
27
+ .aggregate(main)
28
+ .settings(commonSettings)
29
+ .dependsOn(main % "compile->test")
30
+ .enablePlugins(JmhPlugin)
20
31
 
21
- resolvers += Resolver.jcenterRepo
32
+ lazy val main = (project in file("."))
33
+ .settings(commonSettings)
22
34
 
23
- libraryDependencies ++= Dependencies.value
@@ -0,0 +1 @@
1
+ sbt.version=1.0.1
@@ -0,0 +1 @@
1
+ addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.27")
@@ -9,10 +9,25 @@
9
9
  <ns2:author>maji-KY</ns2:author>
10
10
  </ns2:meta>
11
11
  <ns2:date>20010101</ns2:date>
12
+ <ns2:dateTime>2000-12-31 15:00:00</ns2:dateTime>
12
13
  <ns2:list>
13
14
  <ns2:value>a</ns2:value>
14
15
  <ns2:value>b</ns2:value>
15
16
  <ns2:value>c</ns2:value>
16
17
  </ns2:list>
18
+ <ns2:rating by="subscribers">2.5</ns2:rating>
19
+ <ns2:rating>3.5</ns2:rating>
20
+ <ns2:released>true</ns2:released>
21
+ </ns2:entry>
22
+ <ns2:entry>
23
+ <ns2:id>2</ns2:id>
24
+ <ns2:title>Bonjour!</ns2:title>
25
+ <ns2:meta>
26
+ <ns2:author>maji-KY</ns2:author>
27
+ </ns2:meta>
28
+ <ns2:date>20010101</ns2:date>
29
+ <ns2:list></ns2:list>
30
+ <ns2:rating>3.5</ns2:rating>
31
+ <ns2:released>false</ns2:released>
17
32
  </ns2:entry>
18
33
  </ns1:root>
@@ -23,20 +23,23 @@ class XPath2ParserPluginSpec {
23
23
 
24
24
  val dataPath: String = classOf[XPath2ParserPlugin].getClassLoader.getResource("data.xml").getPath
25
25
 
26
- @Test def test() {
26
+ def configSource: ConfigSource = Exec.newConfigSource()
27
+ .set("in", Map[String, String]("type" -> "file", "path_prefix" -> dataPath).asJava)
28
+ .set("root", "/ns1:root/ns2:entry")
29
+ .set("schema", List[util.Map[String, String]](
30
+ Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
31
+ Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
32
+ Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
33
+ Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d", "timezone" -> "Asia/Tokyo").asJava,
34
+ Map("path" -> "ns2:dateTime", "name" -> "date_time", "type" -> "timestamp", "format" -> "%Y-%m-%d %H:%M:%S", "timezone" -> "UTC").asJava,
35
+ Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
36
+ Map("path" -> "ns2:rating[@by='subscribers']", "name" -> "rating_sub", "type" -> "double").asJava,
37
+ Map("path" -> "ns2:released", "name" -> "released", "type" -> "boolean").asJava,
38
+ ).asJava)
39
+ .set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
40
+ .set("out", Map[String, String]("type" -> "stdout").asJava)
27
41
 
28
- val configSource: ConfigSource = Exec.newConfigSource()
29
- .set("in", Map[String, String]("type" -> "file", "path_prefix" -> dataPath).asJava)
30
- .set("root", "/ns1:root/ns2:entry")
31
- .set("schema", List[util.Map[String, String]](
32
- Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
33
- Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
34
- Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
35
- Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d", "timezone" -> "UTC").asJava,
36
- Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
37
- ).asJava)
38
- .set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
39
- .set("out", Map[String, String]("type" -> "stdout").asJava)
42
+ @Test def test() {
40
43
 
41
44
  val task = configSource.loadConfig(classOf[PluginTask])
42
45
 
@@ -51,86 +54,108 @@ class XPath2ParserPluginSpec {
51
54
  task.dump(),
52
55
  schema,
53
56
  new InputStreamFileInput(Exec.getBufferAllocator(), new FileInputStream(new File(dataPath))),
54
- new TransactionalPageOutput() {
55
-
56
- import org.embulk.spi.PageReader
57
-
58
- val reader = new PageReader(schema)
59
-
60
- override def add(page: Page) = {
61
- reader.setPage(page)
62
-
63
- while (reader.nextRecord()) {
64
- val record: collection.mutable.Map[String, Any] = collection.mutable.Map()
65
-
66
- schema.getColumns().asScala.foreach { column =>
67
-
68
- column.visit(new ColumnVisitor() {
69
- override def timestampColumn(column: Column): Unit = {
70
- if (reader.isNull(column)) {
71
- record.put(column.getName, null)
72
- } else {
73
- record.put(column.getName, reader.getTimestamp(column))
74
- }
75
- }
76
-
77
- override def stringColumn(column: Column): Unit = {
78
- if (reader.isNull(column)) {
79
- record.put(column.getName, null)
80
- } else {
81
- record.put(column.getName, reader.getString(column))
82
- }
83
- }
84
-
85
- override def longColumn(column: Column): Unit = {
86
- if (reader.isNull(column)) {
87
- record.put(column.getName, null)
88
- } else {
89
- record.put(column.getName, reader.getLong(column))
90
- }
91
- }
92
-
93
- override def doubleColumn(column: Column): Unit = {
94
- if (reader.isNull(column)) {
95
- record.put(column.getName, null)
96
- } else {
97
- record.put(column.getName, reader.getDouble(column))
98
- }
99
- }
100
-
101
- override def booleanColumn(column: Column): Unit = {
102
- if (reader.isNull(column)) {
103
- record.put(column.getName, null)
104
- } else {
105
- record.put(column.getName, reader.getBoolean(column))
106
- }
107
- }
108
-
109
- override def jsonColumn(column: Column): Unit = {
110
- if (reader.isNull(column)) {
111
- record.put(column.getName, null)
112
- } else {
113
- record.put(column.getName, reader.getJson(column))
114
- }
115
- }
116
- })
117
-
118
-
119
- }
120
- result += record
121
- }
122
- }
123
-
124
- override def commit() = Exec.newTaskReport()
125
- override def abort() = {}
126
- override def finish() = {}
127
- override def close() = {}
128
- }
57
+ new TestTransactionalPageOutput(schema, result)
129
58
  )
130
59
 
131
60
  println(result)
132
61
 
133
- assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "list" -> new JsonParser().parse("""["a","b","c"]"""), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
62
+ assertEquals(ArrayBuffer(
63
+ Map(
64
+ "id" -> 1L,
65
+ "title" -> "Hello!",
66
+ "author" -> "maji-KY",
67
+ "date" -> Timestamp.ofEpochSecond(978274800L),
68
+ "date_time" -> Timestamp.ofEpochSecond(978274800L),
69
+ "list" -> new JsonParser().parse("""["a","b","c"]"""),
70
+ "rating_sub" -> 2.5d,
71
+ "released" -> true,
72
+ ),
73
+ Map(
74
+ "id" -> 2L,
75
+ "title" -> "Bonjour!",
76
+ "author" -> "maji-KY",
77
+ "date" -> Timestamp.ofEpochSecond(978274800L),
78
+ "date_time" -> null,
79
+ "list" -> new JsonParser().parse("[]"),
80
+ "rating_sub" -> null,
81
+ "released" -> false,
82
+ )
83
+ ), result)
134
84
  }
135
85
 
136
86
  }
87
+
88
+ class TestTransactionalPageOutput(schema: Schema, result: mutable.Buffer[collection.mutable.Map[String, Any]])
89
+ extends TransactionalPageOutput {
90
+ import org.embulk.spi.PageReader
91
+
92
+ val reader = new PageReader(schema)
93
+
94
+ override def add(page: Page) = {
95
+ reader.setPage(page)
96
+
97
+ while (reader.nextRecord()) {
98
+ val record: collection.mutable.Map[String, Any] = collection.mutable.Map()
99
+
100
+ schema.getColumns().asScala.foreach { column =>
101
+ column.visit(new TestColumnVisitor(reader, record))
102
+ }
103
+ result += record
104
+ }
105
+ }
106
+
107
+ override def commit() = Exec.newTaskReport()
108
+ override def abort() = {}
109
+ override def finish() = {}
110
+ override def close() = {}
111
+ }
112
+
113
+ class TestColumnVisitor(reader: PageReader, record: collection.mutable.Map[String, Any]) extends ColumnVisitor {
114
+ override def timestampColumn(column: Column): Unit = {
115
+ if (reader.isNull(column)) {
116
+ record.put(column.getName, null)
117
+ } else {
118
+ record.put(column.getName, reader.getTimestamp(column))
119
+ }
120
+ }
121
+
122
+ override def stringColumn(column: Column): Unit = {
123
+ if (reader.isNull(column)) {
124
+ record.put(column.getName, null)
125
+ } else {
126
+ record.put(column.getName, reader.getString(column))
127
+ }
128
+ }
129
+
130
+ override def longColumn(column: Column): Unit = {
131
+ if (reader.isNull(column)) {
132
+ record.put(column.getName, null)
133
+ } else {
134
+ record.put(column.getName, reader.getLong(column))
135
+ }
136
+ }
137
+
138
+ override def doubleColumn(column: Column): Unit = {
139
+ if (reader.isNull(column)) {
140
+ record.put(column.getName, null)
141
+ } else {
142
+ record.put(column.getName, reader.getDouble(column))
143
+ }
144
+ }
145
+
146
+ override def booleanColumn(column: Column): Unit = {
147
+ if (reader.isNull(column)) {
148
+ record.put(column.getName, null)
149
+ } else {
150
+ record.put(column.getName, reader.getBoolean(column))
151
+ }
152
+ }
153
+
154
+ override def jsonColumn(column: Column): Unit = {
155
+ if (reader.isNull(column)) {
156
+ record.put(column.getName, null)
157
+ } else {
158
+ record.put(column.getName, reader.getJson(column))
159
+ }
160
+ }
161
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-xpath2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - maji-KY
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -46,8 +46,10 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE
50
51
  - README.md
52
+ - benchmark/src/main/scala/org/embulk/parser/xpath2/ParseBenchmark.scala
51
53
  - build.gradle
52
54
  - build.sbt
53
55
  - gradle/wrapper/gradle-wrapper.jar
@@ -56,6 +58,8 @@ files:
56
58
  - gradlew.bat
57
59
  - lib/embulk/guess/xpath2.rb
58
60
  - lib/embulk/parser/xpath2.rb
61
+ - project/build.properties
62
+ - project/plugins.sbt
59
63
  - src/main/scala/org/embulk/parser/xpath2/LoanPattern.scala
60
64
  - src/main/scala/org/embulk/parser/xpath2/PluginTask.scala
61
65
  - src/main/scala/org/embulk/parser/xpath2/XPath2ParserPlugin.scala
@@ -64,8 +68,8 @@ files:
64
68
  - src/test/resources/data.xml
65
69
  - src/test/scala/org/embulk/parser/xpath2/UnitSpec.scala
66
70
  - src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala
67
- - classpath/embulk-parser-xpath2-0.0.3.jar
68
- - classpath/scala-library-2.12.3.jar
71
+ - classpath/scala-library-2.12.4.jar
72
+ - classpath/embulk-parser-xpath2-0.0.4.jar
69
73
  homepage: https://github.com/maji-KY/embulk-parser-xpath2
70
74
  licenses:
71
75
  - MIT