RubyGems - xml_data_extractor - Versions diffs - 0.5.0 → 0.6.0 - Mend

xml_data_extractor 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/.editorconfig +8 -0
data/.github/workflows/ci.yml +5 -5
data/.gitignore +1 -0
data/Gemfile.lock +23 -23
data/README.md +290 -1
data/lib/src/extract/array_of.rb +2 -0
data/lib/src/extract/array_value.rb +2 -0
data/lib/src/extract/base.rb +2 -0
data/lib/src/extract/expression.rb +2 -0
data/lib/src/extract/hash_builder.rb +2 -0
data/lib/src/extract/string_value.rb +2 -0
data/lib/src/extract/unescape.rb +2 -0
data/lib/src/extract/value_builder.rb +3 -1
data/lib/src/extract/within.rb +2 -0
data/lib/src/extractor.rb +3 -1
data/lib/src/format/formatter.rb +2 -0
data/lib/src/format/mapper.rb +2 -0
data/lib/src/format/modifier.rb +2 -0
data/lib/src/node.rb +2 -0
data/xml_data_extractor.gemspec +2 -2
metadata +5 -5
data/.travis.yml +0 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b8705267e58445a552f2821a726e431ca93259e0782be80f2d47c2bcf1603aad
-  data.tar.gz: bd59596a4a0d554b055920a09891efeecc8589532b7d02e9e7b4715c54294eee
+  metadata.gz: 334896bd178759618062d648f74af638a88fde5c5cccfaf255279589207670a6
+  data.tar.gz: 6b85212f452f62bfa75a97c66f76c889cc39382d726d26b93a05800eb69e6dbe
 SHA512:
-  metadata.gz: 736fa4908c03918c71417b73e519fcfc658f65a1d7b4b2762ff00e73854e4f0a6f7495963ae68081403e52b50a26aae5dc9393dad66df9f366bd89c4e9af6ba9
-  data.tar.gz: 4ae406899d4538f2f92ec470502b1905a80bbda13273521a2b8d0d94207401cd9c0495ff20bc4c69351d2f4dc34aa21b4606504f716e019148734fe3d45f4f49
+  metadata.gz: 6899c3dafed6462fcb816edbe341a33e7a7388b2f3ba2724d5c2e0bab190f7ce00256c8bda35fbeaed7eaeb58ccfad8c3597c94a1e332ef7e5c125efd6a50924
+  data.tar.gz: 1b0b37b90adba98c9b2085d6a300473dca39bcafd4c801c58a5877ae57ad518be8d6079d0c61725f73809779ea3ace7590e4258494e2d1e0a61b7e914f8e5f69

data/.editorconfig ADDED Viewed

@@ -0,0 +1,8 @@
+[*]
+end_of_line = lf
+[*.{js,rb,yml,json,css,scss,html,erb,rake,slim}]
+insert_final_newline = true
+indent_style = space
+indent_size = 2
+trim_trailing_whitespace = true

data/.github/workflows/ci.yml CHANGED Viewed

@@ -1,6 +1,6 @@
 name: ci
-on:
-  push:
+on: [push, pull_request]
 jobs:
   qa:
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v2.3.4
+        uses: actions/checkout@v3
       - name: Freeze autogenerated files
         run: |
@@ -18,7 +18,7 @@ jobs:
         uses: ruby/setup-ruby@v1
       - name: Cache Ruby Dependencies
-        uses: actions/cache@v2.1.4
+        uses: actions/cache@v3
         with:
           path: vendor/bundle
           key: ${{ runner.os }}-gem-${{ hashFiles('.ruby-version') }}-${{ hashFiles('**/Gemfile.lock') }}
@@ -28,4 +28,4 @@ jobs:
       - name: Run tests
         run: |
-          bin/rspec
+          bin/rspec

data/.gitignore CHANGED Viewed

@@ -6,6 +6,7 @@
 /pkg/
 /spec/reports/
 /tmp/
+*.gem
 # rspec failure tracking
 .rspec_status

data/Gemfile.lock CHANGED Viewed

@@ -1,46 +1,46 @@
 PATH
   remote: .
   specs:
-    xml_data_extractor (0.4.0)
+    xml_data_extractor (0.6.0)
       activesupport (~> 6.0)
       nokogiri (~> 1.0)
 GEM
   remote: https://rubygems.org/
   specs:
-    activesupport (6.1.3)
+    activesupport (6.1.5)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
       tzinfo (~> 2.0)
       zeitwerk (~> 2.3)
-    concurrent-ruby (1.1.8)
-    diff-lcs (1.4.4)
-    i18n (1.8.9)
+    concurrent-ruby (1.1.10)
+    diff-lcs (1.5.0)
+    i18n (1.10.0)
       concurrent-ruby (~> 1.0)
-    mini_portile2 (2.5.0)
-    minitest (5.14.3)
-    nokogiri (1.11.2)
-      mini_portile2 (~> 2.5.0)
+    mini_portile2 (2.8.0)
+    minitest (5.15.0)
+    nokogiri (1.13.3)
+      mini_portile2 (~> 2.8.0)
       racc (~> 1.4)
-    racc (1.5.2)
+    racc (1.6.0)
     rake (12.3.3)
-    rspec (3.10.0)
-      rspec-core (~> 3.10.0)
-      rspec-expectations (~> 3.10.0)
-      rspec-mocks (~> 3.10.0)
-    rspec-core (3.10.1)
-      rspec-support (~> 3.10.0)
-    rspec-expectations (3.10.1)
+    rspec (3.11.0)
+      rspec-core (~> 3.11.0)
+      rspec-expectations (~> 3.11.0)
+      rspec-mocks (~> 3.11.0)
+    rspec-core (3.11.0)
+      rspec-support (~> 3.11.0)
+    rspec-expectations (3.11.0)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.10.0)
-    rspec-mocks (3.10.1)
+      rspec-support (~> 3.11.0)
+    rspec-mocks (3.11.0)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.10.0)
-    rspec-support (3.10.1)
+      rspec-support (~> 3.11.0)
+    rspec-support (3.11.0)
     tzinfo (2.0.4)
       concurrent-ruby (~> 1.0)
-    zeitwerk (2.4.2)
+    zeitwerk (2.5.4)
 PLATFORMS
   ruby
@@ -51,4 +51,4 @@ DEPENDENCIES
   xml_data_extractor!
 BUNDLED WITH
-   2.1.4
+   2.2.6

data/README.md CHANGED Viewed

@@ -300,6 +300,295 @@ schemas:
 }
 ```
+### link
+This command is useful when the XML contains references to other nodes, it works as a SQL JOIN. The path must be and expression containing the `<link>` identifier, which will be replaced by the value fetched from the `link:` command.
+Example:
+```yml
+schemas:
+  bookings:
+    array_of: booking
+    date: booking_date
+    document: id
+    products:
+      array_of:
+      accomodation:
+        path: ../hotel[booking_id=<link>]/accomodation
+        link: id
+```
+```xml
+<xml>
+  <booking>
+    <id>1</id>
+    <booking_date>2020-01-01</booking_date>
+  </booking>
+  <booking>
+    <id>2</id>
+    <booking_date>2020-01-02</booking_date>
+  </booking>
+  <hotel>
+    <booking_id>1</booking_id>
+    <accomodation>Standard</accomodation>
+  </hotel>
+  <hotel>
+    <booking_id>2</booking_id>
+    <accomodation>Premium</accomodation>
+  </hotel>
+</xml>
+```
+```ruby
+{
+  bookings: [
+    {
+      date: "2020-01-01",
+      document: "1"
+      products: [
+        { accomodation: "Standard" }
+      ]
+    },
+    {
+      date: "2020-01-02",
+      document: "2"
+      products: [
+        { accomodation: "Premium" }
+      ]
+    }
+  ]
+}
+```
+In this example if I didn't use the `link` to get only the hotel of each booking, it would have returned two accomodations for each booking and instead of extract a string with the accomodation it would extract an array with all the accomodations for each booking.
+You can combine the `link` with `array_of` if you want search for a list of elements filtering by some field, just provide the `path` and the `link`:
+```yml
+schemas:
+  bookings:
+    array_of: booking
+    date: date
+    document: id
+    products:
+      array_of:
+        path: ../products[booking_id=<link>]
+        link: id
+      ....
+```
+### uniq_by
+Can only be used with **array_of**.
+This functionality is useful when some XML nodes are duplicated and you want to extract data from the first occurrence only. It has a behavior similar to Ruby **uniq** method on arrays.
+For each path generated from `array_of`, the value fetched using `uniq_by` will be checked against the generated collection and the path will be discarded if the value already exists.
+```yml
+schemas:
+  bookings:
+    array_of:
+      path: booking
+      uniq_by: id
+    date: bdate
+    document: id
+```
+```xml
+<xml>
+  <booking>
+    <id>1</id>
+    <bdate>2020-01-01</bdate>
+  </booking>
+  <booking>
+    <id>1</id>
+    <bdate>2020-01-01</bdate>
+  </booking>
+</xml>
+```
+```ruby
+{
+  bookings: [
+    {
+      date: "2020-01-01",
+      document: "1"
+    }
+  ]
+}
+```
+In this example if we don't use the tag `uniq_by` there would be extracted two elements with the same data, like:
+```ruby
+{
+  bookings: [
+    {
+      date: "2020-01-01",
+      document: "1"
+    },
+    {
+      date: "2020-01-01",
+      document: "1"
+    }
+  ]
+}
+```
+### array_presence: first_only
+The field that contains this property will be only added to the first item of the array.
+Can only be used in fields that belong to a node of `array_of`.
+```yml
+passengers:
+  array_of: bookings/booking/passengers/passenger
+  id:
+    path: document
+    modifier: to_s
+  name:
+    attr: [FirstName, LastName]
+    modifier:
+      - name: join
+        params: [" "]
+  rav_tax:
+    array_presence: first_only
+    path: ../rav
+    modifier: to_f
+```
+```xml
+<bookings>
+  <booking>
+    <rav>150<rav>
+    <passengers>
+      <passenger>
+        <document>109.111.019-79</document>
+        <FirstName>Marcelo</FirstName>
+        <LastName>Lauxen</LastName>
+      </passenger>
+      <passenger>
+        <document>110.155.019-78</document>
+        <FirstName>Corona</FirstName>
+        <LastName>Virus</LastName>
+      </passenger>
+    </passengers>
+  </booking>
+</bookings>
+```
+```ruby
+{
+  bookings: [
+    {
+      passengers: [
+        {
+          id: "109.111.019-79",
+          name: "Marcelo Lauxen",
+          tax_rav: 150.00
+        },
+        {
+          id: "110.155.019-78",
+          name: "Corona Virus"
+        }
+      ]
+    }
+  ]
+}
+```
+In this example the field `tax_rav` was only included on the first passenger because this field has the `array_presence: first_only` property.
+### in_parent
+This option allows you to navigate to a parent node of the current node.
+```yml
+passengers:
+  array_of: bookings/booking/passengers/passenger
+  id:
+    path: document
+    modifier: to_s
+  bookings_id:
+    in_parent: bookings
+    path: id
+```
+```xml
+<bookings>
+  <bookings_id>8888</bookings_id>
+  <booking>
+    <passengers>
+      <passenger>
+        <document>109.111.019-79</document>
+      </passenger>
+      <passenger>
+        <document>110.155.019-78</document>
+      </passenger>
+    </passengers>
+  </booking>
+</bookings>
+```
+```ruby
+{
+  bookings: [
+    {
+      passengers: [
+        {
+          id: "109.111.019-79",
+          bookings_id: 8888
+        },
+        {
+          id: "110.155.019-78",
+          bookings_id: 8888
+        }
+      ]
+    }
+  ]
+}
+```
+In this example the value of `bookings_id` will be extracted starting at the node provided in `in_parent` instead of the current node. It's possible to navigate to a parent node with `../` too (xpath provides this functionality), but using `in_parent` you just need to provide the name of the parent node, it will navigate up until the parent node is found, no matter how many levels.
+### keep_if
+This option allows you to keep the part of the block of the hash in the final result only if the condition matches.
+```yml
+schemas:
+  dummy:
+    within: data
+    description: additional_desc
+    exchange: currency_info/value
+    price: price
+    payment:
+      type: payment_info/method
+      value: payment_info/price
+      keep_if: "'type' == 'invoice'"
+```
+```xml
+<data>
+  <additional_desc>Keep walking</additional_desc>
+  <currency_info kind="USD">
+    <value>4.15</value>
+  </currency_info>
+  <price>55.09</price>
+  <payment_info>
+    <method>card</method>
+    <price>55.48</price>
+    <payment>
+      <installments>2</installments>
+      <card_number>333</card_number>
+    </payment>
+  </payment>
+<data>
+```
+```ruby
+{
+  dummy: {
+    description: "Keep walking",
+    exchange: "4.15",
+    price: "55.09"
+  }
+}
+```
+In this example the condition didn't match since the payment method was `card` instead of `invoice` and then the extracted payment hash was removed from the final result.
 ### Formatting:
 #### fixed
@@ -379,7 +668,7 @@ schemas:
     path: [firstname, lastname]
     modifier:
       - name: join
-        params: [" "]
+        params: [" "]
       - downcase
 ```
 ```xml

data/lib/src/extract/array_of.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class ArrayOf < Base
     def initialize(node, extractor, index = 0)

data/lib/src/extract/array_value.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class ArrayValue < Base
     def value

data/lib/src/extract/base.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class Base
     def initialize(node, extractor)

data/lib/src/extract/expression.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class Expression
     def initialize(expression, hash)

data/lib/src/extract/hash_builder.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class HashBuilder < Base
     INTERNAL_FIELDS = %i[array_of keep_if within unescape].freeze

data/lib/src/extract/string_value.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class StringValue < Base
     def value

data/lib/src/extract/unescape.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class Unescape < Base
     def unescape!

data/lib/src/extract/value_builder.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative "base"
 require_relative "array_value"
 require_relative "array_of"
@@ -25,7 +27,7 @@ module Extract
     def value_for_hash
       props = node.props
       Unescape.new(node, extractor).unescape! if props[:unescape]
       fixed_value = props[:fixed]

data/lib/src/extract/within.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Extract
   class Within < Base
     def value

data/lib/src/extractor.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "cgi"
 require "active_support/core_ext/string"
 require_relative "format/formatter"
@@ -37,7 +39,7 @@ class PathBuilder < Struct.new(:base, :parent, :tag, keyword_init: true)
   end
   def matching_tags?(item, tag)
-    item.gsub(/\[\d\]/, "") == tag
+    item.gsub(/\[\d+\]/, "") == tag
   end
 end

data/lib/src/format/formatter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative "mapper"
 require_relative "modifier"

data/lib/src/format/mapper.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Format
   class Mapper
     def initialize(yml)

data/lib/src/format/modifier.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Format
   class Modifier
     def initialize(yml, helper)

data/lib/src/node.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class Node < Struct.new(:props, :path)
   def initialize(*)
     super

data/xml_data_extractor.gemspec CHANGED Viewed

@@ -1,13 +1,13 @@
 Gem::Specification.new do |spec|
   spec.name          = "xml_data_extractor"
-  spec.version       = "0.5.0"
+  spec.version       = "0.6.0"
   spec.authors       = ["Fernando Almeida"]
   spec.email         = ["fernandoprsbr@gmail.com"]
   spec.summary       = "Provides a simples DSL for extracting data from XML documents"
   spec.homepage      = "https://github.com/monde-sistemas/xml_data_extractor"
   spec.license       = "MIT"
-  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.5")
   spec.metadata["homepage_uri"] = spec.homepage
   spec.metadata["source_code_uri"] = spec.homepage

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: xml_data_extractor
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
 platform: ruby
 authors:
 - Fernando Almeida
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-03-12 00:00:00.000000000 Z
+date: 2022-04-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -73,13 +73,13 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- ".editorconfig"
 - ".github/dependabot.yml"
 - ".github/workflows/ci.yml"
 - ".gitignore"
 - ".kodiak.toml"
 - ".rspec"
 - ".ruby-version"
-- ".travis.yml"
 - Gemfile
 - Gemfile.lock
 - LICENSE.txt
@@ -119,14 +119,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.3.0
+      version: '2.5'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.3
+rubygems_version: 3.2.25
 signing_key:
 specification_version: 4
 summary: Provides a simples DSL for extracting data from XML documents

data/.travis.yml DELETED Viewed

@@ -1,6 +0,0 @@
----
-language: ruby
-cache: bundler
-rvm:
-  - 2.6.6
-before_install: gem install bundler -v 2.1.4