embulk-parser-xpath2 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/classpath/{embulk-parser-xpath2-0.0.1.jar → embulk-parser-xpath2-0.0.2.jar} +0 -0
- data/src/main/scala/org/embulk/parser/xpath2/XPath2ParserPlugin.scala +20 -7
- data/src/test/resources/data.xml +5 -0
- data/src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala +5 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d53fe77af6f9aa2bd412603c52395679efcd3ea5
|
4
|
+
data.tar.gz: d65abd7ce1c01795be0360dae44a53e7598e49da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc2a9082b36e2270c4dc5269be02dae1ffebb08d848d338deaf4d4d1b2b630ef6285d28d7f6d4e11cf4fe915e1c79f2c03fea2154e3714aec40e2c514ac37f11
|
7
|
+
data.tar.gz: 5a6c07331cdc7ccbc96a4d5fbbb10457be84ddd0d22f7367d09c353f2de9114442699d477cc2bc4f0ce5b46a711e9e6c6747c3bc119e34152dbe9fa1ef6ec2f7
|
data/build.gradle
CHANGED
Binary file
|
@@ -11,6 +11,7 @@ import org.embulk.spi._
|
|
11
11
|
import org.embulk.spi.`type`._
|
12
12
|
import org.embulk.spi.time.TimestampParser
|
13
13
|
import org.embulk.spi.util.FileInputInputStream
|
14
|
+
import org.msgpack.value.{Value, Variable}
|
14
15
|
import org.slf4j.Logger
|
15
16
|
import org.w3c.dom.{Document, Node, NodeList}
|
16
17
|
|
@@ -61,13 +62,8 @@ class XPath2ParserPlugin extends ParserPlugin {
|
|
61
62
|
val rootNodes = rootXPath.evaluate(doc, XPathConstants.NODESET).asInstanceOf[NodeList]
|
62
63
|
(0 until rootNodes.getLength).map(rootNodes.item).foreach { node =>
|
63
64
|
columnXPaths.zipWithIndex.foreach { case (xPath, idx) =>
|
64
|
-
val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
|
65
65
|
val column = schema.getColumn(idx)
|
66
|
-
|
67
|
-
pb.setNull(column)
|
68
|
-
} else {
|
69
|
-
setColumn(pb, column, value.getTextContent, timestampParsers)
|
70
|
-
}
|
66
|
+
handleColumn(pb, node, xPath, column, timestampParsers)
|
71
67
|
}
|
72
68
|
pb.addRecord()
|
73
69
|
}
|
@@ -94,12 +90,29 @@ class XPath2ParserPlugin extends ParserPlugin {
|
|
94
90
|
}
|
95
91
|
}
|
96
92
|
|
93
|
+
def handleColumn(pb: PageBuilder, node: Node, xPath: XPathExpression, column: Column, timestampParsers: Map[String, TimestampParser]): Unit = {
|
94
|
+
if (column.getType.isInstanceOf[JsonType]) {
|
95
|
+
val value: NodeList = xPath.evaluate(node, XPathConstants.NODESET).asInstanceOf[NodeList]
|
96
|
+
val values: Seq[Value] = (0 until value.getLength).map(value.item).map { valueNode =>
|
97
|
+
new Variable().setStringValue(valueNode.getTextContent).asStringValue()
|
98
|
+
}
|
99
|
+
val jsonValue = new Variable().setArrayValue(values.asJava).asArrayValue()
|
100
|
+
pb.setJson(column, jsonValue)
|
101
|
+
} else {
|
102
|
+
val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
|
103
|
+
if (value == null) {
|
104
|
+
pb.setNull(column)
|
105
|
+
} else {
|
106
|
+
setColumn(pb, column, value.getTextContent, timestampParsers)
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
97
111
|
def setColumn(pb: PageBuilder, column: Column, value: String, timestampParsers: Map[String, TimestampParser]): Unit = column.getType match {
|
98
112
|
case _: StringType => pb.setString(column, value)
|
99
113
|
case _: LongType => pb.setLong(column, value.toLong)
|
100
114
|
case _: DoubleType => pb.setDouble(column, value.toDouble)
|
101
115
|
case _: BooleanType => pb.setBoolean(column, value.toBoolean)
|
102
|
-
case _: JsonType => pb.setString(column, value) // treat json as string.
|
103
116
|
case _: TimestampType => pb.setTimestamp(column, timestampParsers(column.getName).parse(value))
|
104
117
|
}
|
105
118
|
|
data/src/test/resources/data.xml
CHANGED
@@ -6,6 +6,7 @@ import java.util
|
|
6
6
|
import org.embulk.EmbulkTestRuntime
|
7
7
|
import org.embulk.config.{ConfigSource, TaskSource}
|
8
8
|
import org.embulk.spi._
|
9
|
+
import org.embulk.spi.json.JsonParser
|
9
10
|
import org.embulk.spi.time.Timestamp
|
10
11
|
import org.embulk.spi.util.InputStreamFileInput
|
11
12
|
import org.junit.Assert._
|
@@ -31,7 +32,8 @@ class XPath2ParserPluginSpec {
|
|
31
32
|
Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
|
32
33
|
Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
|
33
34
|
Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
|
34
|
-
Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava
|
35
|
+
Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava,
|
36
|
+
Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
|
35
37
|
).asJava)
|
36
38
|
.set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
|
37
39
|
.set("out", Map[String, String]("type" -> "stdout").asJava)
|
@@ -108,7 +110,7 @@ class XPath2ParserPluginSpec {
|
|
108
110
|
if (reader.isNull(column)) {
|
109
111
|
record.put(column.getName, null)
|
110
112
|
} else {
|
111
|
-
record.put(column.getName, reader.
|
113
|
+
record.put(column.getName, reader.getJson(column))
|
112
114
|
}
|
113
115
|
}
|
114
116
|
})
|
@@ -128,7 +130,7 @@ class XPath2ParserPluginSpec {
|
|
128
130
|
|
129
131
|
println(result)
|
130
132
|
|
131
|
-
assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
|
133
|
+
assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "list" -> new JsonParser().parse("""["a","b","c"]"""), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
|
132
134
|
}
|
133
135
|
|
134
136
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-xpath2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maji-KY
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- src/test/resources/data.xml
|
65
65
|
- src/test/scala/org/embulk/parser/xpath2/UnitSpec.scala
|
66
66
|
- src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala
|
67
|
-
- classpath/embulk-parser-xpath2-0.0.
|
67
|
+
- classpath/embulk-parser-xpath2-0.0.2.jar
|
68
68
|
- classpath/scala-library-2.12.3.jar
|
69
69
|
homepage: https://github.com/maji-KY/embulk-parser-xpath2
|
70
70
|
licenses:
|