embulk-parser-xpath2 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e137d05fc96ea14274af6132b979b748f179f13f
4
- data.tar.gz: b0c73e40ec2c784d6d3c74e4ad034e87c145163c
3
+ metadata.gz: d53fe77af6f9aa2bd412603c52395679efcd3ea5
4
+ data.tar.gz: d65abd7ce1c01795be0360dae44a53e7598e49da
5
5
  SHA512:
6
- metadata.gz: 646bc1169d84714eee9908bf20963a625d387b1145f9fcb57c32a1257a92b977373a848b2e3657d206714458a8d17c0bb74489e6b07d103329f4a3b6eefb1810
7
- data.tar.gz: 4e41dca4a70dd30489e689d678656911292dcf40d90cdf2a1332149ba964df11c9cedd06e3b49eb28428b5f31021f5284e28b6b64ee3e97894ee970c73d69891
6
+ metadata.gz: cc2a9082b36e2270c4dc5269be02dae1ffebb08d848d338deaf4d4d1b2b630ef6285d28d7f6d4e11cf4fe915e1c79f2c03fea2154e3714aec40e2c514ac37f11
7
+ data.tar.gz: 5a6c07331cdc7ccbc96a4d5fbbb10457be84ddd0d22f7367d09c353f2de9114442699d477cc2bc4f0ce5b46a711e9e6c6747c3bc119e34152dbe9fa1ef6ec2f7
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.1"
16
+ version = "0.0.2"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -11,6 +11,7 @@ import org.embulk.spi._
11
11
  import org.embulk.spi.`type`._
12
12
  import org.embulk.spi.time.TimestampParser
13
13
  import org.embulk.spi.util.FileInputInputStream
14
+ import org.msgpack.value.{Value, Variable}
14
15
  import org.slf4j.Logger
15
16
  import org.w3c.dom.{Document, Node, NodeList}
16
17
 
@@ -61,13 +62,8 @@ class XPath2ParserPlugin extends ParserPlugin {
61
62
  val rootNodes = rootXPath.evaluate(doc, XPathConstants.NODESET).asInstanceOf[NodeList]
62
63
  (0 until rootNodes.getLength).map(rootNodes.item).foreach { node =>
63
64
  columnXPaths.zipWithIndex.foreach { case (xPath, idx) =>
64
- val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
65
65
  val column = schema.getColumn(idx)
66
- if (value == null) {
67
- pb.setNull(column)
68
- } else {
69
- setColumn(pb, column, value.getTextContent, timestampParsers)
70
- }
66
+ handleColumn(pb, node, xPath, column, timestampParsers)
71
67
  }
72
68
  pb.addRecord()
73
69
  }
@@ -94,12 +90,29 @@ class XPath2ParserPlugin extends ParserPlugin {
94
90
  }
95
91
  }
96
92
 
93
+ def handleColumn(pb: PageBuilder, node: Node, xPath: XPathExpression, column: Column, timestampParsers: Map[String, TimestampParser]): Unit = {
94
+ if (column.getType.isInstanceOf[JsonType]) {
95
+ val value: NodeList = xPath.evaluate(node, XPathConstants.NODESET).asInstanceOf[NodeList]
96
+ val values: Seq[Value] = (0 until value.getLength).map(value.item).map { valueNode =>
97
+ new Variable().setStringValue(valueNode.getTextContent).asStringValue()
98
+ }
99
+ val jsonValue = new Variable().setArrayValue(values.asJava).asArrayValue()
100
+ pb.setJson(column, jsonValue)
101
+ } else {
102
+ val value: Node = xPath.evaluate(node, XPathConstants.NODE).asInstanceOf[Node]
103
+ if (value == null) {
104
+ pb.setNull(column)
105
+ } else {
106
+ setColumn(pb, column, value.getTextContent, timestampParsers)
107
+ }
108
+ }
109
+ }
110
+
97
111
  def setColumn(pb: PageBuilder, column: Column, value: String, timestampParsers: Map[String, TimestampParser]): Unit = column.getType match {
98
112
  case _: StringType => pb.setString(column, value)
99
113
  case _: LongType => pb.setLong(column, value.toLong)
100
114
  case _: DoubleType => pb.setDouble(column, value.toDouble)
101
115
  case _: BooleanType => pb.setBoolean(column, value.toBoolean)
102
- case _: JsonType => pb.setString(column, value) // treat json as string.
103
116
  case _: TimestampType => pb.setTimestamp(column, timestampParsers(column.getName).parse(value))
104
117
  }
105
118
 
@@ -9,5 +9,10 @@
9
9
  <ns2:author>maji-KY</ns2:author>
10
10
  </ns2:meta>
11
11
  <ns2:date>20010101</ns2:date>
12
+ <ns2:list>
13
+ <ns2:value>a</ns2:value>
14
+ <ns2:value>b</ns2:value>
15
+ <ns2:value>c</ns2:value>
16
+ </ns2:list>
12
17
  </ns2:entry>
13
18
  </ns1:root>
@@ -6,6 +6,7 @@ import java.util
6
6
  import org.embulk.EmbulkTestRuntime
7
7
  import org.embulk.config.{ConfigSource, TaskSource}
8
8
  import org.embulk.spi._
9
+ import org.embulk.spi.json.JsonParser
9
10
  import org.embulk.spi.time.Timestamp
10
11
  import org.embulk.spi.util.InputStreamFileInput
11
12
  import org.junit.Assert._
@@ -31,7 +32,8 @@ class XPath2ParserPluginSpec {
31
32
  Map("path" -> "ns2:id", "name" -> "id", "type" -> "long").asJava,
32
33
  Map("path" -> "ns2:title", "name" -> "title", "type" -> "string").asJava,
33
34
  Map("path" -> "ns2:meta/ns2:author", "name" -> "author", "type" -> "string").asJava,
34
- Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava
35
+ Map("path" -> "ns2:date", "name" -> "date", "type" -> "timestamp", "format" -> "%Y%m%d").asJava,
36
+ Map("path" -> "ns2:list/ns2:value", "name" -> "list", "type" -> "json").asJava,
35
37
  ).asJava)
36
38
  .set("namespaces", Map[String, String]("ns1" -> "http://example.com/ns1/", "ns2" -> "http://example.com/ns2/").asJava)
37
39
  .set("out", Map[String, String]("type" -> "stdout").asJava)
@@ -108,7 +110,7 @@ class XPath2ParserPluginSpec {
108
110
  if (reader.isNull(column)) {
109
111
  record.put(column.getName, null)
110
112
  } else {
111
- record.put(column.getName, reader.getString(column))
113
+ record.put(column.getName, reader.getJson(column))
112
114
  }
113
115
  }
114
116
  })
@@ -128,7 +130,7 @@ class XPath2ParserPluginSpec {
128
130
 
129
131
  println(result)
130
132
 
131
- assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
133
+ assertEquals(ArrayBuffer(Map("date" -> Timestamp.ofEpochSecond(978307200L), "list" -> new JsonParser().parse("""["a","b","c"]"""), "title" -> "Hello!", "author" -> "maji-KY", "id" -> 1L)), result)
132
134
  }
133
135
 
134
136
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-xpath2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - maji-KY
@@ -64,7 +64,7 @@ files:
64
64
  - src/test/resources/data.xml
65
65
  - src/test/scala/org/embulk/parser/xpath2/UnitSpec.scala
66
66
  - src/test/scala/org/embulk/parser/xpath2/XPath2ParserPluginSpec.scala
67
- - classpath/embulk-parser-xpath2-0.0.1.jar
67
+ - classpath/embulk-parser-xpath2-0.0.2.jar
68
68
  - classpath/scala-library-2.12.3.jar
69
69
  homepage: https://github.com/maji-KY/embulk-parser-xpath2
70
70
  licenses: