xml_data_extractor 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b8705267e58445a552f2821a726e431ca93259e0782be80f2d47c2bcf1603aad
4
- data.tar.gz: bd59596a4a0d554b055920a09891efeecc8589532b7d02e9e7b4715c54294eee
3
+ metadata.gz: 334896bd178759618062d648f74af638a88fde5c5cccfaf255279589207670a6
4
+ data.tar.gz: 6b85212f452f62bfa75a97c66f76c889cc39382d726d26b93a05800eb69e6dbe
5
5
  SHA512:
6
- metadata.gz: 736fa4908c03918c71417b73e519fcfc658f65a1d7b4b2762ff00e73854e4f0a6f7495963ae68081403e52b50a26aae5dc9393dad66df9f366bd89c4e9af6ba9
7
- data.tar.gz: 4ae406899d4538f2f92ec470502b1905a80bbda13273521a2b8d0d94207401cd9c0495ff20bc4c69351d2f4dc34aa21b4606504f716e019148734fe3d45f4f49
6
+ metadata.gz: 6899c3dafed6462fcb816edbe341a33e7a7388b2f3ba2724d5c2e0bab190f7ce00256c8bda35fbeaed7eaeb58ccfad8c3597c94a1e332ef7e5c125efd6a50924
7
+ data.tar.gz: 1b0b37b90adba98c9b2085d6a300473dca39bcafd4c801c58a5877ae57ad518be8d6079d0c61725f73809779ea3ace7590e4258494e2d1e0a61b7e914f8e5f69
data/.editorconfig ADDED
@@ -0,0 +1,8 @@
1
+ [*]
2
+ end_of_line = lf
3
+
4
+ [*.{js,rb,yml,json,css,scss,html,erb,rake,slim}]
5
+ insert_final_newline = true
6
+ indent_style = space
7
+ indent_size = 2
8
+ trim_trailing_whitespace = true
@@ -1,6 +1,6 @@
1
1
  name: ci
2
- on:
3
- push:
2
+
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  qa:
@@ -8,7 +8,7 @@ jobs:
8
8
  runs-on: ubuntu-latest
9
9
  steps:
10
10
  - name: Checkout code
11
- uses: actions/checkout@v2.3.4
11
+ uses: actions/checkout@v3
12
12
 
13
13
  - name: Freeze autogenerated files
14
14
  run: |
@@ -18,7 +18,7 @@ jobs:
18
18
  uses: ruby/setup-ruby@v1
19
19
 
20
20
  - name: Cache Ruby Dependencies
21
- uses: actions/cache@v2.1.4
21
+ uses: actions/cache@v3
22
22
  with:
23
23
  path: vendor/bundle
24
24
  key: ${{ runner.os }}-gem-${{ hashFiles('.ruby-version') }}-${{ hashFiles('**/Gemfile.lock') }}
@@ -28,4 +28,4 @@ jobs:
28
28
 
29
29
  - name: Run tests
30
30
  run: |
31
- bin/rspec
31
+ bin/rspec
data/.gitignore CHANGED
@@ -6,6 +6,7 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.gem
9
10
 
10
11
  # rspec failure tracking
11
12
  .rspec_status
data/Gemfile.lock CHANGED
@@ -1,46 +1,46 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- xml_data_extractor (0.4.0)
4
+ xml_data_extractor (0.6.0)
5
5
  activesupport (~> 6.0)
6
6
  nokogiri (~> 1.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- activesupport (6.1.3)
11
+ activesupport (6.1.5)
12
12
  concurrent-ruby (~> 1.0, >= 1.0.2)
13
13
  i18n (>= 1.6, < 2)
14
14
  minitest (>= 5.1)
15
15
  tzinfo (~> 2.0)
16
16
  zeitwerk (~> 2.3)
17
- concurrent-ruby (1.1.8)
18
- diff-lcs (1.4.4)
19
- i18n (1.8.9)
17
+ concurrent-ruby (1.1.10)
18
+ diff-lcs (1.5.0)
19
+ i18n (1.10.0)
20
20
  concurrent-ruby (~> 1.0)
21
- mini_portile2 (2.5.0)
22
- minitest (5.14.3)
23
- nokogiri (1.11.2)
24
- mini_portile2 (~> 2.5.0)
21
+ mini_portile2 (2.8.0)
22
+ minitest (5.15.0)
23
+ nokogiri (1.13.3)
24
+ mini_portile2 (~> 2.8.0)
25
25
  racc (~> 1.4)
26
- racc (1.5.2)
26
+ racc (1.6.0)
27
27
  rake (12.3.3)
28
- rspec (3.10.0)
29
- rspec-core (~> 3.10.0)
30
- rspec-expectations (~> 3.10.0)
31
- rspec-mocks (~> 3.10.0)
32
- rspec-core (3.10.1)
33
- rspec-support (~> 3.10.0)
34
- rspec-expectations (3.10.1)
28
+ rspec (3.11.0)
29
+ rspec-core (~> 3.11.0)
30
+ rspec-expectations (~> 3.11.0)
31
+ rspec-mocks (~> 3.11.0)
32
+ rspec-core (3.11.0)
33
+ rspec-support (~> 3.11.0)
34
+ rspec-expectations (3.11.0)
35
35
  diff-lcs (>= 1.2.0, < 2.0)
36
- rspec-support (~> 3.10.0)
37
- rspec-mocks (3.10.1)
36
+ rspec-support (~> 3.11.0)
37
+ rspec-mocks (3.11.0)
38
38
  diff-lcs (>= 1.2.0, < 2.0)
39
- rspec-support (~> 3.10.0)
40
- rspec-support (3.10.1)
39
+ rspec-support (~> 3.11.0)
40
+ rspec-support (3.11.0)
41
41
  tzinfo (2.0.4)
42
42
  concurrent-ruby (~> 1.0)
43
- zeitwerk (2.4.2)
43
+ zeitwerk (2.5.4)
44
44
 
45
45
  PLATFORMS
46
46
  ruby
@@ -51,4 +51,4 @@ DEPENDENCIES
51
51
  xml_data_extractor!
52
52
 
53
53
  BUNDLED WITH
54
- 2.1.4
54
+ 2.2.6
data/README.md CHANGED
@@ -300,6 +300,295 @@ schemas:
300
300
  }
301
301
  ```
302
302
 
303
+ ### link
304
+
305
+ This command is useful when the XML contains references to other nodes, it works as a SQL JOIN. The path must be and expression containing the `<link>` identifier, which will be replaced by the value fetched from the `link:` command.
306
+
307
+ Example:
308
+ ```yml
309
+ schemas:
310
+ bookings:
311
+ array_of: booking
312
+ date: booking_date
313
+ document: id
314
+ products:
315
+ array_of:
316
+ accomodation:
317
+ path: ../hotel[booking_id=<link>]/accomodation
318
+ link: id
319
+ ```
320
+ ```xml
321
+ <xml>
322
+ <booking>
323
+ <id>1</id>
324
+ <booking_date>2020-01-01</booking_date>
325
+ </booking>
326
+ <booking>
327
+ <id>2</id>
328
+ <booking_date>2020-01-02</booking_date>
329
+ </booking>
330
+ <hotel>
331
+ <booking_id>1</booking_id>
332
+ <accomodation>Standard</accomodation>
333
+ </hotel>
334
+ <hotel>
335
+ <booking_id>2</booking_id>
336
+ <accomodation>Premium</accomodation>
337
+ </hotel>
338
+ </xml>
339
+ ```
340
+ ```ruby
341
+ {
342
+ bookings: [
343
+ {
344
+ date: "2020-01-01",
345
+ document: "1"
346
+ products: [
347
+ { accomodation: "Standard" }
348
+ ]
349
+ },
350
+ {
351
+ date: "2020-01-02",
352
+ document: "2"
353
+ products: [
354
+ { accomodation: "Premium" }
355
+ ]
356
+ }
357
+ ]
358
+ }
359
+ ```
360
+
361
+ In this example if I didn't use the `link` to get only the hotel of each booking, it would have returned two accomodations for each booking and instead of extract a string with the accomodation it would extract an array with all the accomodations for each booking.
362
+
363
+ You can combine the `link` with `array_of` if you want search for a list of elements filtering by some field, just provide the `path` and the `link`:
364
+
365
+ ```yml
366
+ schemas:
367
+ bookings:
368
+ array_of: booking
369
+ date: date
370
+ document: id
371
+ products:
372
+ array_of:
373
+ path: ../products[booking_id=<link>]
374
+ link: id
375
+ ....
376
+ ```
377
+
378
+ ### uniq_by
379
+
380
+ Can only be used with **array_of**.
381
+
382
+ This functionality is useful when some XML nodes are duplicated and you want to extract data from the first occurrence only. It has a behavior similar to Ruby **uniq** method on arrays.
383
+ For each path generated from `array_of`, the value fetched using `uniq_by` will be checked against the generated collection and the path will be discarded if the value already exists.
384
+
385
+ ```yml
386
+ schemas:
387
+ bookings:
388
+ array_of:
389
+ path: booking
390
+ uniq_by: id
391
+ date: bdate
392
+ document: id
393
+ ```
394
+ ```xml
395
+ <xml>
396
+ <booking>
397
+ <id>1</id>
398
+ <bdate>2020-01-01</bdate>
399
+ </booking>
400
+ <booking>
401
+ <id>1</id>
402
+ <bdate>2020-01-01</bdate>
403
+ </booking>
404
+ </xml>
405
+ ```
406
+ ```ruby
407
+ {
408
+ bookings: [
409
+ {
410
+ date: "2020-01-01",
411
+ document: "1"
412
+ }
413
+ ]
414
+ }
415
+ ```
416
+
417
+ In this example if we don't use the tag `uniq_by` there would be extracted two elements with the same data, like:
418
+
419
+ ```ruby
420
+ {
421
+ bookings: [
422
+ {
423
+ date: "2020-01-01",
424
+ document: "1"
425
+ },
426
+ {
427
+ date: "2020-01-01",
428
+ document: "1"
429
+ }
430
+ ]
431
+ }
432
+ ```
433
+
434
+ ### array_presence: first_only
435
+
436
+ The field that contains this property will be only added to the first item of the array.
437
+
438
+ Can only be used in fields that belong to a node of `array_of`.
439
+
440
+ ```yml
441
+ passengers:
442
+ array_of: bookings/booking/passengers/passenger
443
+ id:
444
+ path: document
445
+ modifier: to_s
446
+ name:
447
+ attr: [FirstName, LastName]
448
+ modifier:
449
+ - name: join
450
+ params: [" "]
451
+ rav_tax:
452
+ array_presence: first_only
453
+ path: ../rav
454
+ modifier: to_f
455
+ ```
456
+ ```xml
457
+ <bookings>
458
+ <booking>
459
+ <rav>150<rav>
460
+ <passengers>
461
+ <passenger>
462
+ <document>109.111.019-79</document>
463
+ <FirstName>Marcelo</FirstName>
464
+ <LastName>Lauxen</LastName>
465
+ </passenger>
466
+ <passenger>
467
+ <document>110.155.019-78</document>
468
+ <FirstName>Corona</FirstName>
469
+ <LastName>Virus</LastName>
470
+ </passenger>
471
+ </passengers>
472
+ </booking>
473
+ </bookings>
474
+ ```
475
+ ```ruby
476
+ {
477
+ bookings: [
478
+ {
479
+ passengers: [
480
+ {
481
+ id: "109.111.019-79",
482
+ name: "Marcelo Lauxen",
483
+ tax_rav: 150.00
484
+ },
485
+ {
486
+ id: "110.155.019-78",
487
+ name: "Corona Virus"
488
+ }
489
+ ]
490
+ }
491
+ ]
492
+ }
493
+ ```
494
+
495
+ In this example the field `tax_rav` was only included on the first passenger because this field has the `array_presence: first_only` property.
496
+
497
+ ### in_parent
498
+
499
+ This option allows you to navigate to a parent node of the current node.
500
+
501
+ ```yml
502
+ passengers:
503
+ array_of: bookings/booking/passengers/passenger
504
+ id:
505
+ path: document
506
+ modifier: to_s
507
+ bookings_id:
508
+ in_parent: bookings
509
+ path: id
510
+ ```
511
+ ```xml
512
+ <bookings>
513
+ <bookings_id>8888</bookings_id>
514
+ <booking>
515
+ <passengers>
516
+ <passenger>
517
+ <document>109.111.019-79</document>
518
+ </passenger>
519
+ <passenger>
520
+ <document>110.155.019-78</document>
521
+ </passenger>
522
+ </passengers>
523
+ </booking>
524
+ </bookings>
525
+ ```
526
+ ```ruby
527
+ {
528
+ bookings: [
529
+ {
530
+ passengers: [
531
+ {
532
+ id: "109.111.019-79",
533
+ bookings_id: 8888
534
+ },
535
+ {
536
+ id: "110.155.019-78",
537
+ bookings_id: 8888
538
+ }
539
+ ]
540
+ }
541
+ ]
542
+ }
543
+ ```
544
+
545
+ In this example the value of `bookings_id` will be extracted starting at the node provided in `in_parent` instead of the current node. It's possible to navigate to a parent node with `../` too (xpath provides this functionality), but using `in_parent` you just need to provide the name of the parent node, it will navigate up until the parent node is found, no matter how many levels.
546
+
547
+ ### keep_if
548
+
549
+ This option allows you to keep the part of the block of the hash in the final result only if the condition matches.
550
+
551
+ ```yml
552
+ schemas:
553
+ dummy:
554
+ within: data
555
+ description: additional_desc
556
+ exchange: currency_info/value
557
+ price: price
558
+ payment:
559
+ type: payment_info/method
560
+ value: payment_info/price
561
+ keep_if: "'type' == 'invoice'"
562
+ ```
563
+ ```xml
564
+ <data>
565
+ <additional_desc>Keep walking</additional_desc>
566
+ <currency_info kind="USD">
567
+ <value>4.15</value>
568
+ </currency_info>
569
+ <price>55.09</price>
570
+ <payment_info>
571
+ <method>card</method>
572
+ <price>55.48</price>
573
+ <payment>
574
+ <installments>2</installments>
575
+ <card_number>333</card_number>
576
+ </payment>
577
+ </payment>
578
+ <data>
579
+ ```
580
+ ```ruby
581
+ {
582
+ dummy: {
583
+ description: "Keep walking",
584
+ exchange: "4.15",
585
+ price: "55.09"
586
+ }
587
+ }
588
+ ```
589
+
590
+ In this example the condition didn't match since the payment method was `card` instead of `invoice` and then the extracted payment hash was removed from the final result.
591
+
303
592
  ### Formatting:
304
593
 
305
594
  #### fixed
@@ -379,7 +668,7 @@ schemas:
379
668
  path: [firstname, lastname]
380
669
  modifier:
381
670
  - name: join
382
- params: [" "]
671
+ params: [" "]
383
672
  - downcase
384
673
  ```
385
674
  ```xml
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayOf < Base
3
5
  def initialize(node, extractor, index = 0)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayValue < Base
3
5
  def value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Base
3
5
  def initialize(node, extractor)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Expression
3
5
  def initialize(expression, hash)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class HashBuilder < Base
3
5
  INTERNAL_FIELDS = %i[array_of keep_if within unescape].freeze
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class StringValue < Base
3
5
  def value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Unescape < Base
3
5
  def unescape!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "base"
2
4
  require_relative "array_value"
3
5
  require_relative "array_of"
@@ -25,7 +27,7 @@ module Extract
25
27
 
26
28
  def value_for_hash
27
29
  props = node.props
28
-
30
+
29
31
  Unescape.new(node, extractor).unescape! if props[:unescape]
30
32
 
31
33
  fixed_value = props[:fixed]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Within < Base
3
5
  def value
data/lib/src/extractor.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "cgi"
2
4
  require "active_support/core_ext/string"
3
5
  require_relative "format/formatter"
@@ -37,7 +39,7 @@ class PathBuilder < Struct.new(:base, :parent, :tag, keyword_init: true)
37
39
  end
38
40
 
39
41
  def matching_tags?(item, tag)
40
- item.gsub(/\[\d\]/, "") == tag
42
+ item.gsub(/\[\d+\]/, "") == tag
41
43
  end
42
44
  end
43
45
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "mapper"
2
4
  require_relative "modifier"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Mapper
3
5
  def initialize(yml)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Modifier
3
5
  def initialize(yml, helper)
data/lib/src/node.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Node < Struct.new(:props, :path)
2
4
  def initialize(*)
3
5
  super
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "xml_data_extractor"
3
- spec.version = "0.5.0"
3
+ spec.version = "0.6.0"
4
4
  spec.authors = ["Fernando Almeida"]
5
5
  spec.email = ["fernandoprsbr@gmail.com"]
6
6
 
7
7
  spec.summary = "Provides a simples DSL for extracting data from XML documents"
8
8
  spec.homepage = "https://github.com/monde-sistemas/xml_data_extractor"
9
9
  spec.license = "MIT"
10
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
10
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5")
11
11
 
12
12
  spec.metadata["homepage_uri"] = spec.homepage
13
13
  spec.metadata["source_code_uri"] = spec.homepage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_data_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fernando Almeida
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2022-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -73,13 +73,13 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
+ - ".editorconfig"
76
77
  - ".github/dependabot.yml"
77
78
  - ".github/workflows/ci.yml"
78
79
  - ".gitignore"
79
80
  - ".kodiak.toml"
80
81
  - ".rspec"
81
82
  - ".ruby-version"
82
- - ".travis.yml"
83
83
  - Gemfile
84
84
  - Gemfile.lock
85
85
  - LICENSE.txt
@@ -119,14 +119,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
119
  requirements:
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 2.3.0
122
+ version: '2.5'
123
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  requirements:
125
125
  - - ">="
126
126
  - !ruby/object:Gem::Version
127
127
  version: '0'
128
128
  requirements: []
129
- rubygems_version: 3.2.3
129
+ rubygems_version: 3.2.25
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: Provides a simples DSL for extracting data from XML documents
data/.travis.yml DELETED
@@ -1,6 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.6.6
6
- before_install: gem install bundler -v 2.1.4