embulk-parser-xpath2 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c6ca2b4389dd1a0c784bd26735599b07bca338a
4
- data.tar.gz: f9514cf861f316e940779590896042e8b005ab4a
3
+ metadata.gz: 2aebf3205f36802a6d5240064e69da2e4481ab32
4
+ data.tar.gz: c74a3fb9310df91b0695bc83a2495bbac12860e0
5
5
  SHA512:
6
- metadata.gz: 62cb373a04138dba8e690592f23a15259eb9d95bda20c9a1cffdd3f6e4da5e36e897bd4931258f576dd5e09007e67907e0b92c98df1caba41d6eb4c4ec02ab1b
7
- data.tar.gz: 44c6b99ddbdedbe24d42e4dc9fc2bdf70ee6c14032feeed191a582f190d0e8bfbf5a73236def253d7528a4fc5c7113aa3181bbe636d4c94d545a585a69b646d4
6
+ metadata.gz: e5d7656c75d2c8c7a82d266fa26fba9730f798551d0e573c1924d9ab40d3debc40f3aec0a9219327e23c5e8282be3e9d9b206768b6783d7d95cb137e784984ea
7
+ data.tar.gz: 07d9a54720290242e298724c80743bb75997e6d1149fad566380195c282e6e8860c18a07bf810949ca5e522283d19cabcdc9a2062dffa6272d4d33f78302b40a
data/.gitignore CHANGED
@@ -9,8 +9,7 @@ build/
9
9
  /.settings/
10
10
  /.metadata/
11
11
  .classpath
12
- .project
12
+ project/project
13
13
  /bin/
14
- project
15
14
  *.iml
16
15
  out
data/.travis.yml ADDED
@@ -0,0 +1,21 @@
1
+ language: scala
2
+ scala:
3
+ - 2.12.4
4
+ jdk:
5
+ - oraclejdk8
6
+ script:
7
+ - gradle test
8
+ - gradle gem
9
+
10
+ before_deploy:
11
+ - echo '---' > ~/.gem/credentials
12
+ - echo ':rubygems_api_key:' ${RUBYGEMS_API_KEY} > ~/.gem/credentials
13
+ - chmod 0600 ~/.gem/credentials
14
+
15
+ deploy:
16
+ provider: script
17
+ script:
18
+ - gradle gemPush
19
+ on:
20
+ tags: true
21
+ all_branches: true
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
1
  # Xml parser plugin for Embulk
2
+ [![Gem Version](https://badge.fury.io/rb/embulk-parser-xpath2.svg)](https://badge.fury.io/rb/embulk-parser-xpath2)
3
+ [![Build Status](https://travis-ci.org/maji-KY/embulk-parser-xpath2.svg?branch=develop)](https://travis-ci.org/maji-KY/embulk-parser-xpath2)
2
4
 
3
5
  Embulk parser plugin for parsing xml data by XPath perfectly!
4
6
 
@@ -54,3 +56,9 @@ Then you can fetch entries from the following xml:
54
56
  ```
55
57
  $ ./gradlew gem
56
58
  ```
59
+
60
+ ## Benchmark
61
+
62
+ ```
63
+ $ sbt benchmark/jmh:run
64
+ ```
@@ -0,0 +1,99 @@
1
+ package org.embulk.parser.xpath2
2
+
3
+ import java.io.{InputStream, PipedInputStream, PipedOutputStream}
4
+
5
+ import org.embulk.EmbulkTestRuntime
6
+ import org.embulk.config.TaskSource
7
+ import org.embulk.spi.util.InputStreamFileInput
8
+ import org.embulk.spi.{Exec, Schema}
9
+ import org.openjdk.jmh.annotations.Benchmark
10
+
11
+ import scala.collection.mutable
12
+
13
+ class ParseBenchmark {
14
+ import ParseBenchmark._
15
+
16
+ @Benchmark
17
+ def run(): Unit = {
18
+ Exec.doWith(runtime.getExec, () => {
19
+ val configSource = test.configSource
20
+ val task = configSource.loadConfig(classOf[PluginTask])
21
+
22
+ var schema: Schema = null
23
+
24
+ val plugin = new XPath2ParserPlugin()
25
+ plugin.transaction(configSource, (_: TaskSource, s: Schema) => {schema = s})
26
+
27
+ val result: mutable.Buffer[collection.mutable.Map[String, Any]] = mutable.Buffer()
28
+
29
+ plugin.run(
30
+ task.dump(),
31
+ schema,
32
+ new InputStreamFileInput(Exec.getBufferAllocator(), testDataInput),
33
+ new TestTransactionalPageOutput(schema, result)
34
+ )
35
+
36
+ require(result.size == TestRecordSize)
37
+ })
38
+ }
39
+
40
+ }
41
+
42
+ object ParseBenchmark {
43
+
44
+ val TestRecordSize = 1000
45
+
46
+ val test = new XPath2ParserPluginSpec()
47
+ val runtime = new EmbulkTestRuntime
48
+
49
+ val testDataXmlEntry =
50
+ """ <ns2:entry>
51
+ | <ns2:id>1</ns2:id>
52
+ | <ns2:title>Hello!</ns2:title>
53
+ | <ns2:meta>
54
+ | <ns2:author>maji-KY</ns2:author>
55
+ | </ns2:meta>
56
+ | <ns2:date>20010101</ns2:date>
57
+ | <ns2:dateTime>2000-12-31 15:00:00</ns2:dateTime>
58
+ | <ns2:list>
59
+ | <ns2:value>a</ns2:value>
60
+ | <ns2:value>b</ns2:value>
61
+ | <ns2:value>c</ns2:value>
62
+ | </ns2:list>
63
+ | <ns2:rating by="subscribers">2.5</ns2:rating>
64
+ | <ns2:rating>3.5</ns2:rating>
65
+ | <ns2:released>true</ns2:released>
66
+ | </ns2:entry>
67
+ """.stripMargin.getBytes
68
+
69
+ def testDataInput: InputStream = {
70
+ val header =
71
+ """<?xml version="1.0"?>
72
+ |<ns1:root
73
+ | xmlns:ns1="http://example.com/ns1/"
74
+ | xmlns:ns2="http://example.com/ns2/">
75
+ """.stripMargin.getBytes
76
+ val footer =
77
+ """
78
+ |</ns1:root>""".stripMargin.getBytes
79
+
80
+ val pipedOut = new PipedOutputStream
81
+ val pipedIn = new PipedInputStream(pipedOut)
82
+ new Thread() {
83
+ override def run(): Unit = {
84
+ pipedOut.write(header)
85
+ 1 to TestRecordSize foreach { _ =>
86
+ pipedOut.write(testDataXmlEntry)
87
+ }
88
+ pipedOut.write(footer)
89
+ pipedOut.close()
90
+ }
91
+ }.start()
92
+ pipedIn
93
+ }
94
+
95
+ def main(args: Array[String]): Unit = {
96
+ new ParseBenchmark().run()
97
+ }
98
+
99
+ }
data/build.gradle CHANGED
@@ -13,19 +13,22 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.3"
16
+ version = "0.0.4"
17
+ ext {
18
+ embulkVersion = "0.8.35"
19
+ }
17
20
 
18
21
  sourceCompatibility = 1.8
19
22
  targetCompatibility = 1.8
20
23
 
21
24
  dependencies {
22
- compile "org.embulk:embulk-core:0.8.32"
23
- provided "org.embulk:embulk-core:0.8.32"
24
- testCompile "org.embulk:embulk-core:0.8.32:tests"
25
- testCompile "org.embulk:embulk-standards:0.8.32"
25
+ compile "org.embulk:embulk-core:${embulkVersion}"
26
+ provided "org.embulk:embulk-core:${embulkVersion}"
27
+ testCompile "org.embulk:embulk-core:${embulkVersion}:tests"
28
+ testCompile "org.embulk:embulk-standards:${embulkVersion}"
26
29
  testCompile "junit:junit:4.+"
27
30
 
28
- compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.3'
31
+ compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.4'
29
32
  testCompile group: 'org.scalatest', name: 'scalatest_2.12', version: '3.0.4'
30
33
 
31
34
  }
@@ -37,6 +40,13 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
37
40
  }
38
41
  clean { delete "classpath" }
39
42
 
43
+ test {
44
+ testLogging {
45
+ events 'failed'
46
+ exceptionFormat 'full'
47
+ }
48
+ }
49
+
40
50
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
41
51
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
42
52
  script "${project.name}.gemspec"
data/build.sbt CHANGED
@@ -1,23 +1,34 @@
1
- lazy val root = (project in file(".")).
2
- settings(
3
- inThisBuild(List(
4
- organization := "com.github.maji-KY",
5
- scalaVersion := "2.12.3",
6
- version := "0.0.1-SNAPSHOT"
7
- )),
8
- name := "embulk-parser-xpath2",
9
- scalacOptions ++= Seq(
10
- "-deprecation",
11
- "-feature",
12
- "-unchecked",
13
- "-Xlint",
14
- "-Ywarn-dead-code",
15
- "-Ywarn-numeric-widen",
16
- "-Ywarn-unused",
17
- "-Ywarn-value-discard"
18
- )
1
+ val embulkVersion = "0.8.35"
2
+
3
+ lazy val commonSettings = Seq(
4
+ organization := "com.github.maji-KY",
5
+ scalaVersion := "2.12.4",
6
+ version := "CANNOT_RELEASE",
7
+ scalacOptions ++= Seq(
8
+ "-deprecation",
9
+ "-feature",
10
+ "-unchecked",
11
+ "-Xlint",
12
+ "-Ywarn-dead-code",
13
+ "-Ywarn-numeric-widen",
14
+ "-Ywarn-unused",
15
+ "-Ywarn-value-discard"
16
+ ),
17
+ resolvers += Resolver.jcenterRepo,
18
+ libraryDependencies ++= Seq(
19
+ "org.embulk" % "embulk-core" % embulkVersion,
20
+ "org.embulk" % "embulk-core" % embulkVersion classifier "tests",
21
+ "junit" % "junit" % "4.+" % "test",
22
+ "org.scalatest" %% "scalatest" % "3.0.4" % "test"
19
23
  )
24
+ )
25
+
26
+ lazy val benchmark = (project in file("benchmark"))
27
+ .aggregate(main)
28
+ .settings(commonSettings)
29
+ .dependsOn(main % "compile->test")
30
+ .enablePlugins(JmhPlugin)
20
31
 
21
- resolvers += Resolver.jcenterRepo
32
+ lazy val main = (project in file("."))
33
+ .settings(commonSettings)
22
34
 
23
- libraryDependencies ++= Dependencies.value
@@ -0,0 +1 @@
1
+ sbt.version=1.0.1
@@ -0,0 +1 @@
1
+ addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.27")
@@ -9,10 +9,25 @@
9
9
  <ns2:author>maji-KY</ns2:author>
10
10
  </ns2:meta>
11
11
  <ns2:date>20010101</ns2:date>
12
+ <ns2:dateTime>2000-12-31 15:00:00</ns2:dateTime>
12
13
  <ns2:list>
13
14
  <ns2:value>a</ns2:value>
14
15
  <ns2:value>b</ns2:value>
15
16
  <ns2:value>c</ns2:value>
16
17
  </ns2:list>
18
+ <ns2:rating by="subscribers">2.5</ns2:rating>
19
+ <ns2:rating>3.5</ns2:rating>
20
+ <ns2:released>true</ns2:released>
21
+ </ns2:entry>
22
+ <ns2:entry>
23
+ <ns2:id>2</ns2:id>
24
+ <ns2:title>Bonjour!</ns2:title>
25
+ <ns2:meta>
26
+ <ns2:author>maji-KY</ns2:author>
27
+ </ns2:meta>
28
+ <ns2:date>20010101</ns2:date>
29
+ <ns2:list></ns2:list>
30
+ <ns2:rating>3.5</ns2:rating>
31
+ <ns2:released>false</ns2:released>
17
32
  </ns2:entry>
18
33
  </ns1:root>
@@ -23,20 +23,23 @@ class XPath2ParserPluginSpec {
23
23
 
24
24
  val dataPath: String = classOf[XPath2ParserPlugin].getClassLoader.getResource("data.xml").getPath
25
25
 
26
- @Test def test() {
26
+ def configSource: ConfigSource = Exec.newConfigSource()
27
+ .set("in", Map[String, String]("type" -> "file", "path_prefix" -> dataPath).asJava)
28
+ .set("root", "/ns1:root/ns2:entry")
29
+ .set("schema", List[util.Map[String, String]](
30
+ Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
31
+ Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
32
+ Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
33
+ Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d", "timezone" -> "Asia/Tokyo").asJava,
34
+ Map("path" -> "ns2:dateTime", "name" -> "date_time", "type" -> "timestamp", "format" -> "%Y-%m-%d %H:%M:%S", "timezone" -> "UTC").asJava,
35
+ Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
36
+ Map("path" -> "ns2:rating[@by='subscribers']", "name" -> "rating_sub", "type" -> "double").asJava,
37
+ Map("path" -> "ns2:released", "name" -> "released", "type" -> "boolean").asJava,
38
+ ).asJava)
39
+ .set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
40
+ .set("out", Map[String, String]("type" -> "stdout").asJava)
27
41
 
28
- val configSource: ConfigSource = Exec.newConfigSource()
29
- .set("in", Map[String, String]("type" -> "file", "path_prefix" -> dataPath).asJava)
30
- .set("root", "/ns1:root/ns2:entry")
31
- .set("schema", List[util.Map[String, String]](
32
- Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
33
- Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
34
- Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
35
- Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d", "timezone" -> "UTC").asJava,
36
- Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
37
- ).asJava)
38
- .set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
39
- .set("out", Map[String, String]("type" -> "stdout").asJava)
42
+ @Test def test() {
40
43
 
41
44
  val task = configSource.loadConfig(classOf[PluginTask])
42
45
 
@@ -51,86 +54,108 @@ class XPath2ParserPluginSpec {
51
54
  task.dump(),
52
55
  schema,
53
56
  new InputStreamFileInput(Exec.getBufferAllocator(), new FileInputStream(new File(dataPath))),
54
- new TransactionalPageOutput() {
55
-
56
- import org.embulk.spi.PageReader
57
-
58
- val reader = new PageReader(schema)
59
-
60
- override def add(page: Page) = {
61
- reader.setPage(page)
62
-
63
- while (reader.nextRecord()) {
64
- val record: collection.mutable.Map[String, Any] = collection.mutable.Map()
65
-
66
- schema.getColumns().asScala.foreach { column =>
67
-
68
- column.visit(new ColumnVisitor() {
69
- override def timestampColumn(column: Column): Unit = {
70
- if (reader.isNull(column)) {
71
- record.put(column.getName, null)
72
- } else {
73
- record.put(column.getName, reader.getTimestamp(column))
74
- }
75
- }
76
-
77
- override def stringColumn(column: Column): Unit = {
78
- if (reader.isNull(column)) {
79
- record.put(column.getName, null)
80
- } else {
81
- record.put(column.getName, reader.getString(column))
82
- }
83
- }
84
-
85
- override def longColumn(column: Column): Unit = {
86
- if (reader.isNull(column)) {
87
- record.put(column.getName, null)
88
- } else {
89
- record.put(column.getName, reader.getLong(column))
90
- }
91
- }
92
-
93
- override def doubleColumn(column: Column): Unit = {
94
- if (reader.isNull(column)) {
95
- record.put(column.getName, null)
96
- } else {
97
- record.put(column.getName, reader.getDouble(column))
98
- }
99
- }
100
-
101
- override def booleanColumn(column: Column): Unit = {
102
- if (reader.isNull(column)) {
103
- record.put(column.getName, null)
104
- } else {
105
- record.put(column.getName, reader.getBoolean(column))
106
- }
107
- }
108
-
109
- override def jsonColumn(column: Column): Unit = {
110
- if (reader.isNull(column)) {
111
- record.put(column.getName, null)
112
- } else {
113
- record.put(column.getName, reader.getJson(column))
114
- }
115
- }
116
- })
117
-
118
-
119
- }
120
- result += record
121
- }
122
- }
123
-
124
- override def commit() = Exec.newTaskReport()
125
- override def abort() = {}
126
- override def finish() = {}
127
- override def close() = {}
128
- }
57
+ new TestTransactionalPageOutput(schema, result)
129
58
  )
130
59
 
131
60
  println(result)
132
61
 
133
- assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "list" -> new JsonParser().parse("""["a","b","c"]"""), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
62
+ assertEquals(ArrayBuffer(
63
+ Map(
64
+ "id" -> 1L,
65
+ "title" -> "Hello!",
66
+ "author" -> "maji-KY",
67
+ "date" -> Timestamp.ofEpochSecond(978274800L),
68
+ "date_time" -> Timestamp.ofEpochSecond(978274800L),
69
+ "list" -> new JsonParser().parse("""["a","b","c"]"""),
70
+ "rating_sub" -> 2.5d,
71
+ "released" -> true,
72
+ ),
73
+ Map(
74
+ "id" -> 2L,
75
+ "title" -> "Bonjour!",
76
+ "author" -> "maji-KY",
77
+ "date" -> Timestamp.ofEpochSecond(978274800L),
78
+ "date_time" -> null,
79
+ "list" -> new JsonParser().parse("[]"),
80
+ "rating_sub" -> null,
81
+ "released" -> false,
82
+ )
83
+ ), result)
134
84
  }
135
85
 
136
86
  }
87
+
88
+ class TestTransactionalPageOutput(schema: Schema, result: mutable.Buffer[collection.mutable.Map[String, Any]])
89
+ extends TransactionalPageOutput {
90
+ import org.embulk.spi.PageReader
91
+
92
+ val reader = new PageReader(schema)
93
+
94
+ override def add(page: Page) = {
95
+ reader.setPage(page)
96
+
97
+ while (reader.nextRecord()) {
98
+ val record: collection.mutable.Map[String, Any] = collection.mutable.Map()
99
+
100
+ schema.getColumns().asScala.foreach { column =>
101
+ column.visit(new TestColumnVisitor(reader, record))
102
+ }
103
+ result += record
104
+ }
105
+ }
106
+
107
+ override def commit() = Exec.newTaskReport()
108
+ override def abort() = {}
109
+ override def finish() = {}
110
+ override def close() = {}
111
+ }
112
+
113
+ class TestColumnVisitor(reader: PageReader, record: collection.mutable.Map[String, Any]) extends ColumnVisitor {
114
+ override def timestampColumn(column: Column): Unit = {
115
+ if (reader.isNull(column)) {
116
+ record.put(column.getName, null)
117
+ } else {
118
+ record.put(column.getName, reader.getTimestamp(column))
119
+ }
120
+ }
121
+
122
+ override def stringColumn(column: Column): Unit = {
123
+ if (reader.isNull(column)) {
124
+ record.put(column.getName, null)
125
+ } else {
126
+ record.put(column.getName, reader.getString(column))
127
+ }
128
+ }
129
+
130
+ override def longColumn(column: Column): Unit = {
131
+ if (reader.isNull(column)) {
132
+ record.put(column.getName, null)
133
+ } else {
134
+ record.put(column.getName, reader.getLong(column))
135
+ }
136
+ }
137
+
138
+ override def doubleColumn(column: Column): Unit = {
139
+ if (reader.isNull(column)) {
140
+ record.put(column.getName, null)
141
+ } else {
142
+ record.put(column.getName, reader.getDouble(column))
143
+ }
144
+ }
145
+
146
+ override def booleanColumn(column: Column): Unit = {
147
+ if (reader.isNull(column)) {
148
+ record.put(column.getName, null)
149
+ } else {
150
+ record.put(column.getName, reader.getBoolean(column))
151
+ }
152
+ }
153
+
154
+ override def jsonColumn(column: Column): Unit = {
155
+ if (reader.isNull(column)) {
156
+ record.put(column.getName, null)
157
+ } else {
158
+ record.put(column.getName, reader.getJson(column))
159
+ }
160
+ }
161
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-xpath2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - maji-KY
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -46,8 +46,10 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE
50
51
  - README.md
52
+ - benchmark/src/main/scala/org/embulk/parser/xpath2/ParseBenchmark.scala
51
53
  - build.gradle
52
54
  - build.sbt
53
55
  - gradle/wrapper/gradle-wrapper.jar
@@ -56,6 +58,8 @@ files:
56
58
  - gradlew.bat
57
59
  - lib/embulk/guess/xpath2.rb
58
60
  - lib/embulk/parser/xpath2.rb
61
+ - project/build.properties
62
+ - project/plugins.sbt
59
63
  - src/main/scala/org/embulk/parser/xpath2/LoanPattern.scala
60
64
  - src/main/scala/org/embulk/parser/xpath2/PluginTask.scala
61
65
  - src/main/scala/org/embulk/parser/xpath2/XPath2ParserPlugin.scala
@@ -64,8 +68,8 @@ files:
64
68
  - src/test/resources/data.xml
65
69
  - src/test/scala/org/embulk/parser/xpath2/UnitSpec.scala
66
70
  - src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala
67
- - classpath/embulk-parser-xpath2-0.0.3.jar
68
- - classpath/scala-library-2.12.3.jar
71
+ - classpath/scala-library-2.12.4.jar
72
+ - classpath/embulk-parser-xpath2-0.0.4.jar
69
73
  homepage: https://github.com/maji-KY/embulk-parser-xpath2
70
74
  licenses:
71
75
  - MIT