xml_data_extractor 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b8705267e58445a552f2821a726e431ca93259e0782be80f2d47c2bcf1603aad
4
- data.tar.gz: bd59596a4a0d554b055920a09891efeecc8589532b7d02e9e7b4715c54294eee
3
+ metadata.gz: 334896bd178759618062d648f74af638a88fde5c5cccfaf255279589207670a6
4
+ data.tar.gz: 6b85212f452f62bfa75a97c66f76c889cc39382d726d26b93a05800eb69e6dbe
5
5
  SHA512:
6
- metadata.gz: 736fa4908c03918c71417b73e519fcfc658f65a1d7b4b2762ff00e73854e4f0a6f7495963ae68081403e52b50a26aae5dc9393dad66df9f366bd89c4e9af6ba9
7
- data.tar.gz: 4ae406899d4538f2f92ec470502b1905a80bbda13273521a2b8d0d94207401cd9c0495ff20bc4c69351d2f4dc34aa21b4606504f716e019148734fe3d45f4f49
6
+ metadata.gz: 6899c3dafed6462fcb816edbe341a33e7a7388b2f3ba2724d5c2e0bab190f7ce00256c8bda35fbeaed7eaeb58ccfad8c3597c94a1e332ef7e5c125efd6a50924
7
+ data.tar.gz: 1b0b37b90adba98c9b2085d6a300473dca39bcafd4c801c58a5877ae57ad518be8d6079d0c61725f73809779ea3ace7590e4258494e2d1e0a61b7e914f8e5f69
data/.editorconfig ADDED
@@ -0,0 +1,8 @@
1
+ [*]
2
+ end_of_line = lf
3
+
4
+ [*.{js,rb,yml,json,css,scss,html,erb,rake,slim}]
5
+ insert_final_newline = true
6
+ indent_style = space
7
+ indent_size = 2
8
+ trim_trailing_whitespace = true
@@ -1,6 +1,6 @@
1
1
  name: ci
2
- on:
3
- push:
2
+
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  qa:
@@ -8,7 +8,7 @@ jobs:
8
8
  runs-on: ubuntu-latest
9
9
  steps:
10
10
  - name: Checkout code
11
- uses: actions/checkout@v2.3.4
11
+ uses: actions/checkout@v3
12
12
 
13
13
  - name: Freeze autogenerated files
14
14
  run: |
@@ -18,7 +18,7 @@ jobs:
18
18
  uses: ruby/setup-ruby@v1
19
19
 
20
20
  - name: Cache Ruby Dependencies
21
- uses: actions/cache@v2.1.4
21
+ uses: actions/cache@v3
22
22
  with:
23
23
  path: vendor/bundle
24
24
  key: ${{ runner.os }}-gem-${{ hashFiles('.ruby-version') }}-${{ hashFiles('**/Gemfile.lock') }}
@@ -28,4 +28,4 @@ jobs:
28
28
 
29
29
  - name: Run tests
30
30
  run: |
31
- bin/rspec
31
+ bin/rspec
data/.gitignore CHANGED
@@ -6,6 +6,7 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.gem
9
10
 
10
11
  # rspec failure tracking
11
12
  .rspec_status
data/Gemfile.lock CHANGED
@@ -1,46 +1,46 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- xml_data_extractor (0.4.0)
4
+ xml_data_extractor (0.6.0)
5
5
  activesupport (~> 6.0)
6
6
  nokogiri (~> 1.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- activesupport (6.1.3)
11
+ activesupport (6.1.5)
12
12
  concurrent-ruby (~> 1.0, >= 1.0.2)
13
13
  i18n (>= 1.6, < 2)
14
14
  minitest (>= 5.1)
15
15
  tzinfo (~> 2.0)
16
16
  zeitwerk (~> 2.3)
17
- concurrent-ruby (1.1.8)
18
- diff-lcs (1.4.4)
19
- i18n (1.8.9)
17
+ concurrent-ruby (1.1.10)
18
+ diff-lcs (1.5.0)
19
+ i18n (1.10.0)
20
20
  concurrent-ruby (~> 1.0)
21
- mini_portile2 (2.5.0)
22
- minitest (5.14.3)
23
- nokogiri (1.11.2)
24
- mini_portile2 (~> 2.5.0)
21
+ mini_portile2 (2.8.0)
22
+ minitest (5.15.0)
23
+ nokogiri (1.13.3)
24
+ mini_portile2 (~> 2.8.0)
25
25
  racc (~> 1.4)
26
- racc (1.5.2)
26
+ racc (1.6.0)
27
27
  rake (12.3.3)
28
- rspec (3.10.0)
29
- rspec-core (~> 3.10.0)
30
- rspec-expectations (~> 3.10.0)
31
- rspec-mocks (~> 3.10.0)
32
- rspec-core (3.10.1)
33
- rspec-support (~> 3.10.0)
34
- rspec-expectations (3.10.1)
28
+ rspec (3.11.0)
29
+ rspec-core (~> 3.11.0)
30
+ rspec-expectations (~> 3.11.0)
31
+ rspec-mocks (~> 3.11.0)
32
+ rspec-core (3.11.0)
33
+ rspec-support (~> 3.11.0)
34
+ rspec-expectations (3.11.0)
35
35
  diff-lcs (>= 1.2.0, < 2.0)
36
- rspec-support (~> 3.10.0)
37
- rspec-mocks (3.10.1)
36
+ rspec-support (~> 3.11.0)
37
+ rspec-mocks (3.11.0)
38
38
  diff-lcs (>= 1.2.0, < 2.0)
39
- rspec-support (~> 3.10.0)
40
- rspec-support (3.10.1)
39
+ rspec-support (~> 3.11.0)
40
+ rspec-support (3.11.0)
41
41
  tzinfo (2.0.4)
42
42
  concurrent-ruby (~> 1.0)
43
- zeitwerk (2.4.2)
43
+ zeitwerk (2.5.4)
44
44
 
45
45
  PLATFORMS
46
46
  ruby
@@ -51,4 +51,4 @@ DEPENDENCIES
51
51
  xml_data_extractor!
52
52
 
53
53
  BUNDLED WITH
54
- 2.1.4
54
+ 2.2.6
data/README.md CHANGED
@@ -300,6 +300,295 @@ schemas:
300
300
  }
301
301
  ```
302
302
 
303
+ ### link
304
+
305
+ This command is useful when the XML contains references to other nodes, it works as a SQL JOIN. The path must be and expression containing the `<link>` identifier, which will be replaced by the value fetched from the `link:` command.
306
+
307
+ Example:
308
+ ```yml
309
+ schemas:
310
+ bookings:
311
+ array_of: booking
312
+ date: booking_date
313
+ document: id
314
+ products:
315
+ array_of:
316
+ accomodation:
317
+ path: ../hotel[booking_id=<link>]/accomodation
318
+ link: id
319
+ ```
320
+ ```xml
321
+ <xml>
322
+ <booking>
323
+ <id>1</id>
324
+ <booking_date>2020-01-01</booking_date>
325
+ </booking>
326
+ <booking>
327
+ <id>2</id>
328
+ <booking_date>2020-01-02</booking_date>
329
+ </booking>
330
+ <hotel>
331
+ <booking_id>1</booking_id>
332
+ <accomodation>Standard</accomodation>
333
+ </hotel>
334
+ <hotel>
335
+ <booking_id>2</booking_id>
336
+ <accomodation>Premium</accomodation>
337
+ </hotel>
338
+ </xml>
339
+ ```
340
+ ```ruby
341
+ {
342
+ bookings: [
343
+ {
344
+ date: "2020-01-01",
345
+ document: "1"
346
+ products: [
347
+ { accomodation: "Standard" }
348
+ ]
349
+ },
350
+ {
351
+ date: "2020-01-02",
352
+ document: "2"
353
+ products: [
354
+ { accomodation: "Premium" }
355
+ ]
356
+ }
357
+ ]
358
+ }
359
+ ```
360
+
361
+ In this example if I didn't use the `link` to get only the hotel of each booking, it would have returned two accomodations for each booking and instead of extract a string with the accomodation it would extract an array with all the accomodations for each booking.
362
+
363
+ You can combine the `link` with `array_of` if you want search for a list of elements filtering by some field, just provide the `path` and the `link`:
364
+
365
+ ```yml
366
+ schemas:
367
+ bookings:
368
+ array_of: booking
369
+ date: date
370
+ document: id
371
+ products:
372
+ array_of:
373
+ path: ../products[booking_id=<link>]
374
+ link: id
375
+ ....
376
+ ```
377
+
378
+ ### uniq_by
379
+
380
+ Can only be used with **array_of**.
381
+
382
+ This functionality is useful when some XML nodes are duplicated and you want to extract data from the first occurrence only. It has a behavior similar to Ruby **uniq** method on arrays.
383
+ For each path generated from `array_of`, the value fetched using `uniq_by` will be checked against the generated collection and the path will be discarded if the value already exists.
384
+
385
+ ```yml
386
+ schemas:
387
+ bookings:
388
+ array_of:
389
+ path: booking
390
+ uniq_by: id
391
+ date: bdate
392
+ document: id
393
+ ```
394
+ ```xml
395
+ <xml>
396
+ <booking>
397
+ <id>1</id>
398
+ <bdate>2020-01-01</bdate>
399
+ </booking>
400
+ <booking>
401
+ <id>1</id>
402
+ <bdate>2020-01-01</bdate>
403
+ </booking>
404
+ </xml>
405
+ ```
406
+ ```ruby
407
+ {
408
+ bookings: [
409
+ {
410
+ date: "2020-01-01",
411
+ document: "1"
412
+ }
413
+ ]
414
+ }
415
+ ```
416
+
417
+ In this example if we don't use the tag `uniq_by` there would be extracted two elements with the same data, like:
418
+
419
+ ```ruby
420
+ {
421
+ bookings: [
422
+ {
423
+ date: "2020-01-01",
424
+ document: "1"
425
+ },
426
+ {
427
+ date: "2020-01-01",
428
+ document: "1"
429
+ }
430
+ ]
431
+ }
432
+ ```
433
+
434
+ ### array_presence: first_only
435
+
436
+ The field that contains this property will be only added to the first item of the array.
437
+
438
+ Can only be used in fields that belong to a node of `array_of`.
439
+
440
+ ```yml
441
+ passengers:
442
+ array_of: bookings/booking/passengers/passenger
443
+ id:
444
+ path: document
445
+ modifier: to_s
446
+ name:
447
+ attr: [FirstName, LastName]
448
+ modifier:
449
+ - name: join
450
+ params: [" "]
451
+ rav_tax:
452
+ array_presence: first_only
453
+ path: ../rav
454
+ modifier: to_f
455
+ ```
456
+ ```xml
457
+ <bookings>
458
+ <booking>
459
+ <rav>150<rav>
460
+ <passengers>
461
+ <passenger>
462
+ <document>109.111.019-79</document>
463
+ <FirstName>Marcelo</FirstName>
464
+ <LastName>Lauxen</LastName>
465
+ </passenger>
466
+ <passenger>
467
+ <document>110.155.019-78</document>
468
+ <FirstName>Corona</FirstName>
469
+ <LastName>Virus</LastName>
470
+ </passenger>
471
+ </passengers>
472
+ </booking>
473
+ </bookings>
474
+ ```
475
+ ```ruby
476
+ {
477
+ bookings: [
478
+ {
479
+ passengers: [
480
+ {
481
+ id: "109.111.019-79",
482
+ name: "Marcelo Lauxen",
483
+ tax_rav: 150.00
484
+ },
485
+ {
486
+ id: "110.155.019-78",
487
+ name: "Corona Virus"
488
+ }
489
+ ]
490
+ }
491
+ ]
492
+ }
493
+ ```
494
+
495
+ In this example the field `tax_rav` was only included on the first passenger because this field has the `array_presence: first_only` property.
496
+
497
+ ### in_parent
498
+
499
+ This option allows you to navigate to a parent node of the current node.
500
+
501
+ ```yml
502
+ passengers:
503
+ array_of: bookings/booking/passengers/passenger
504
+ id:
505
+ path: document
506
+ modifier: to_s
507
+ bookings_id:
508
+ in_parent: bookings
509
+ path: id
510
+ ```
511
+ ```xml
512
+ <bookings>
513
+ <bookings_id>8888</bookings_id>
514
+ <booking>
515
+ <passengers>
516
+ <passenger>
517
+ <document>109.111.019-79</document>
518
+ </passenger>
519
+ <passenger>
520
+ <document>110.155.019-78</document>
521
+ </passenger>
522
+ </passengers>
523
+ </booking>
524
+ </bookings>
525
+ ```
526
+ ```ruby
527
+ {
528
+ bookings: [
529
+ {
530
+ passengers: [
531
+ {
532
+ id: "109.111.019-79",
533
+ bookings_id: 8888
534
+ },
535
+ {
536
+ id: "110.155.019-78",
537
+ bookings_id: 8888
538
+ }
539
+ ]
540
+ }
541
+ ]
542
+ }
543
+ ```
544
+
545
+ In this example the value of `bookings_id` will be extracted starting at the node provided in `in_parent` instead of the current node. It's possible to navigate to a parent node with `../` too (xpath provides this functionality), but using `in_parent` you just need to provide the name of the parent node, it will navigate up until the parent node is found, no matter how many levels.
546
+
547
+ ### keep_if
548
+
549
+ This option allows you to keep the part of the block of the hash in the final result only if the condition matches.
550
+
551
+ ```yml
552
+ schemas:
553
+ dummy:
554
+ within: data
555
+ description: additional_desc
556
+ exchange: currency_info/value
557
+ price: price
558
+ payment:
559
+ type: payment_info/method
560
+ value: payment_info/price
561
+ keep_if: "'type' == 'invoice'"
562
+ ```
563
+ ```xml
564
+ <data>
565
+ <additional_desc>Keep walking</additional_desc>
566
+ <currency_info kind="USD">
567
+ <value>4.15</value>
568
+ </currency_info>
569
+ <price>55.09</price>
570
+ <payment_info>
571
+ <method>card</method>
572
+ <price>55.48</price>
573
+ <payment>
574
+ <installments>2</installments>
575
+ <card_number>333</card_number>
576
+ </payment>
577
+ </payment>
578
+ <data>
579
+ ```
580
+ ```ruby
581
+ {
582
+ dummy: {
583
+ description: "Keep walking",
584
+ exchange: "4.15",
585
+ price: "55.09"
586
+ }
587
+ }
588
+ ```
589
+
590
+ In this example the condition didn't match since the payment method was `card` instead of `invoice` and then the extracted payment hash was removed from the final result.
591
+
303
592
  ### Formatting:
304
593
 
305
594
  #### fixed
@@ -379,7 +668,7 @@ schemas:
379
668
  path: [firstname, lastname]
380
669
  modifier:
381
670
  - name: join
382
- params: [" "]
671
+ params: [" "]
383
672
  - downcase
384
673
  ```
385
674
  ```xml
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayOf < Base
3
5
  def initialize(node, extractor, index = 0)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayValue < Base
3
5
  def value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Base
3
5
  def initialize(node, extractor)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Expression
3
5
  def initialize(expression, hash)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class HashBuilder < Base
3
5
  INTERNAL_FIELDS = %i[array_of keep_if within unescape].freeze
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class StringValue < Base
3
5
  def value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Unescape < Base
3
5
  def unescape!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "base"
2
4
  require_relative "array_value"
3
5
  require_relative "array_of"
@@ -25,7 +27,7 @@ module Extract
25
27
 
26
28
  def value_for_hash
27
29
  props = node.props
28
-
30
+
29
31
  Unescape.new(node, extractor).unescape! if props[:unescape]
30
32
 
31
33
  fixed_value = props[:fixed]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Within < Base
3
5
  def value
data/lib/src/extractor.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "cgi"
2
4
  require "active_support/core_ext/string"
3
5
  require_relative "format/formatter"
@@ -37,7 +39,7 @@ class PathBuilder < Struct.new(:base, :parent, :tag, keyword_init: true)
37
39
  end
38
40
 
39
41
  def matching_tags?(item, tag)
40
- item.gsub(/\[\d\]/, "") == tag
42
+ item.gsub(/\[\d+\]/, "") == tag
41
43
  end
42
44
  end
43
45
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "mapper"
2
4
  require_relative "modifier"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Mapper
3
5
  def initialize(yml)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Modifier
3
5
  def initialize(yml, helper)
data/lib/src/node.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Node < Struct.new(:props, :path)
2
4
  def initialize(*)
3
5
  super
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "xml_data_extractor"
3
- spec.version = "0.5.0"
3
+ spec.version = "0.6.0"
4
4
  spec.authors = ["Fernando Almeida"]
5
5
  spec.email = ["fernandoprsbr@gmail.com"]
6
6
 
7
7
  spec.summary = "Provides a simples DSL for extracting data from XML documents"
8
8
  spec.homepage = "https://github.com/monde-sistemas/xml_data_extractor"
9
9
  spec.license = "MIT"
10
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
10
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5")
11
11
 
12
12
  spec.metadata["homepage_uri"] = spec.homepage
13
13
  spec.metadata["source_code_uri"] = spec.homepage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_data_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fernando Almeida
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2022-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -73,13 +73,13 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
+ - ".editorconfig"
76
77
  - ".github/dependabot.yml"
77
78
  - ".github/workflows/ci.yml"
78
79
  - ".gitignore"
79
80
  - ".kodiak.toml"
80
81
  - ".rspec"
81
82
  - ".ruby-version"
82
- - ".travis.yml"
83
83
  - Gemfile
84
84
  - Gemfile.lock
85
85
  - LICENSE.txt
@@ -119,14 +119,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
119
  requirements:
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 2.3.0
122
+ version: '2.5'
123
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  requirements:
125
125
  - - ">="
126
126
  - !ruby/object:Gem::Version
127
127
  version: '0'
128
128
  requirements: []
129
- rubygems_version: 3.2.3
129
+ rubygems_version: 3.2.25
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: Provides a simples DSL for extracting data from XML documents
data/.travis.yml DELETED
@@ -1,6 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.6.6
6
- before_install: gem install bundler -v 2.1.4