embulk-parser-xpath2 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/classpath/{embulk-parser-xpath2-0.0.1.jar → embulk-parser-xpath2-0.0.2.jar} +0 -0
- data/src/main/scala/org/embulk/parser/xpath2/XPath2ParserPlugin.scala +20 -7
- data/src/test/resources/data.xml +5 -0
- data/src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala +5 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d53fe77af6f9aa2bd412603c52395679efcd3ea5
|
4
|
+
data.tar.gz: d65abd7ce1c01795be0360dae44a53e7598e49da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc2a9082b36e2270c4dc5269be02dae1ffebb08d848d338deaf4d4d1b2b630ef6285d28d7f6d4e11cf4fe915e1c79f2c03fea2154e3714aec40e2c514ac37f11
|
7
|
+
data.tar.gz: 5a6c07331cdc7ccbc96a4d5fbbb10457be84ddd0d22f7367d09c353f2de9114442699d477cc2bc4f0ce5b46a711e9e6c6747c3bc119e34152dbe9fa1ef6ec2f7
|
data/build.gradle
CHANGED
Binary file
|
@@ -11,6 +11,7 @@ import org.embulk.spi._
|
|
11
11
|
import org.embulk.spi.`type`._
|
12
12
|
import org.embulk.spi.time.TimestampParser
|
13
13
|
import org.embulk.spi.util.FileInputInputStream
|
14
|
+
import org.msgpack.value.{Value, Variable}
|
14
15
|
import org.slf4j.Logger
|
15
16
|
import org.w3c.dom.{Document, Node, NodeList}
|
16
17
|
|
@@ -61,13 +62,8 @@ class XPath2ParserPlugin extends ParserPlugin {
|
|
61
62
|
val rootNodes = rootXPath.evaluate(doc, XPathConstants.NODESET).asInstanceOf[NodeList]
|
62
63
|
(0 until rootNodes.getLength).map(rootNodes.item).foreach { node =>
|
63
64
|
columnXPaths.zipWithIndex.foreach { case (xPath, idx) =>
|
64
|
-
val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
|
65
65
|
val column = schema.getColumn(idx)
|
66
|
-
|
67
|
-
pb.setNull(column)
|
68
|
-
} else {
|
69
|
-
setColumn(pb, column, value.getTextContent, timestampParsers)
|
70
|
-
}
|
66
|
+
handleColumn(pb, node, xPath, column, timestampParsers)
|
71
67
|
}
|
72
68
|
pb.addRecord()
|
73
69
|
}
|
@@ -94,12 +90,29 @@ class XPath2ParserPlugin extends ParserPlugin {
|
|
94
90
|
}
|
95
91
|
}
|
96
92
|
|
93
|
+
def handleColumn(pb: PageBuilder, node: Node, xPath: XPathExpression, column: Column, timestampParsers: Map[String, TimestampParser]): Unit = {
|
94
|
+
if (column.getType.isInstanceOf[JsonType]) {
|
95
|
+
val value: NodeList = xPath.evaluate(node, XPathConstants.NODESET).asInstanceOf[NodeList]
|
96
|
+
val values: Seq[Value] = (0 until value.getLength).map(value.item).map { valueNode =>
|
97
|
+
new Variable().setStringValue(valueNode.getTextContent).asStringValue()
|
98
|
+
}
|
99
|
+
val jsonValue = new Variable().setArrayValue(values.asJava).asArrayValue()
|
100
|
+
pb.setJson(column, jsonValue)
|
101
|
+
} else {
|
102
|
+
val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
|
103
|
+
if (value == null) {
|
104
|
+
pb.setNull(column)
|
105
|
+
} else {
|
106
|
+
setColumn(pb, column, value.getTextContent, timestampParsers)
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
97
111
|
def setColumn(pb: PageBuilder, column: Column, value: String, timestampParsers: Map[String, TimestampParser]): Unit = column.getType match {
|
98
112
|
case _: StringType => pb.setString(column, value)
|
99
113
|
case _: LongType => pb.setLong(column, value.toLong)
|
100
114
|
case _: DoubleType => pb.setDouble(column, value.toDouble)
|
101
115
|
case _: BooleanType => pb.setBoolean(column, value.toBoolean)
|
102
|
-
case _: JsonType => pb.setString(column, value) // treat json as string.
|
103
116
|
case _: TimestampType => pb.setTimestamp(column, timestampParsers(column.getName).parse(value))
|
104
117
|
}
|
105
118
|
|
data/src/test/resources/data.xml
CHANGED
@@ -6,6 +6,7 @@ import java.util
|
|
6
6
|
import org.embulk.EmbulkTestRuntime
|
7
7
|
import org.embulk.config.{ConfigSource, TaskSource}
|
8
8
|
import org.embulk.spi._
|
9
|
+
import org.embulk.spi.json.JsonParser
|
9
10
|
import org.embulk.spi.time.Timestamp
|
10
11
|
import org.embulk.spi.util.InputStreamFileInput
|
11
12
|
import org.junit.Assert._
|
@@ -31,7 +32,8 @@ class XPath2ParserPluginSpec {
|
|
31
32
|
Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
|
32
33
|
Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
|
33
34
|
Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
|
34
|
-
Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava
|
35
|
+
Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava,
|
36
|
+
Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
|
35
37
|
).asJava)
|
36
38
|
.set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
|
37
39
|
.set("out", Map[String, String]("type" -> "stdout").asJava)
|
@@ -108,7 +110,7 @@ class XPath2ParserPluginSpec {
|
|
108
110
|
if (reader.isNull(column)) {
|
109
111
|
record.put(column.getName, null)
|
110
112
|
} else {
|
111
|
-
record.put(column.getName, reader.
|
113
|
+
record.put(column.getName, reader.getJson(column))
|
112
114
|
}
|
113
115
|
}
|
114
116
|
})
|
@@ -128,7 +130,7 @@ class XPath2ParserPluginSpec {
|
|
128
130
|
|
129
131
|
println(result)
|
130
132
|
|
131
|
-
assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
|
133
|
+
assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "list" -> new JsonParser().parse("""["a","b","c"]"""), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
|
132
134
|
}
|
133
135
|
|
134
136
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-xpath2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maji-KY
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- src/test/resources/data.xml
|
65
65
|
- src/test/scala/org/embulk/parser/xpath2/UnitSpec.scala
|
66
66
|
- src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala
|
67
|
-
- classpath/embulk-parser-xpath2-0.0.
|
67
|
+
- classpath/embulk-parser-xpath2-0.0.2.jar
|
68
68
|
- classpath/scala-library-2.12.3.jar
|
69
69
|
homepage: https://github.com/maji-KY/embulk-parser-xpath2
|
70
70
|
licenses:
|