xml_data_extractor 0.3.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa8684820cc0394f1d83bc06c5eaa3378e6a7cc12e33c31b166018edea86d714
4
- data.tar.gz: 57ef32a54ff735afd93e25836b44773a8d79ad922fc3837437cb10beead43e1f
3
+ metadata.gz: 334896bd178759618062d648f74af638a88fde5c5cccfaf255279589207670a6
4
+ data.tar.gz: 6b85212f452f62bfa75a97c66f76c889cc39382d726d26b93a05800eb69e6dbe
5
5
  SHA512:
6
- metadata.gz: 043ab0237d908959e98ebf371d36fc643c85d3df4fc46c213b2f8ff6e8ed1118b373c27b80685e4f4c486667d27b73476d44368aacec9e2a186ae24e0e9d8dcc
7
- data.tar.gz: 52baa3e42b3d65999024f89949152f2f98cf6049635aa37afc204ace50e8dbe75b7e40bcbf467c87309c487d50f1b1bf6e9e4849caa611fe290c218adee66685
6
+ metadata.gz: 6899c3dafed6462fcb816edbe341a33e7a7388b2f3ba2724d5c2e0bab190f7ce00256c8bda35fbeaed7eaeb58ccfad8c3597c94a1e332ef7e5c125efd6a50924
7
+ data.tar.gz: 1b0b37b90adba98c9b2085d6a300473dca39bcafd4c801c58a5877ae57ad518be8d6079d0c61725f73809779ea3ace7590e4258494e2d1e0a61b7e914f8e5f69
data/.editorconfig ADDED
@@ -0,0 +1,8 @@
1
+ [*]
2
+ end_of_line = lf
3
+
4
+ [*.{js,rb,yml,json,css,scss,html,erb,rake,slim}]
5
+ insert_final_newline = true
6
+ indent_style = space
7
+ indent_size = 2
8
+ trim_trailing_whitespace = true
@@ -0,0 +1,25 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "08:00"
8
+ timezone: America/Sao_Paulo
9
+ open-pull-requests-limit: 10
10
+ versioning-strategy: lockfile-only
11
+ labels:
12
+ - dependencies
13
+ - ruby
14
+ - automerge
15
+ - package-ecosystem: "github-actions"
16
+ directory: "/"
17
+ schedule:
18
+ interval: daily
19
+ time: "08:00"
20
+ timezone: America/Sao_Paulo
21
+ open-pull-requests-limit: 10
22
+ labels:
23
+ - dependencies
24
+ - github-actions
25
+ - automerge
@@ -0,0 +1,31 @@
1
+ name: ci
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ qa:
7
+ timeout-minutes: 5
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Checkout code
11
+ uses: actions/checkout@v3
12
+
13
+ - name: Freeze autogenerated files
14
+ run: |
15
+ chmod 0444 Gemfile.lock
16
+
17
+ - name: Setup Ruby
18
+ uses: ruby/setup-ruby@v1
19
+
20
+ - name: Cache Ruby Dependencies
21
+ uses: actions/cache@v3
22
+ with:
23
+ path: vendor/bundle
24
+ key: ${{ runner.os }}-gem-${{ hashFiles('.ruby-version') }}-${{ hashFiles('**/Gemfile.lock') }}
25
+
26
+ - name: Setup project
27
+ run: bin/setup
28
+
29
+ - name: Run tests
30
+ run: |
31
+ bin/rspec
data/.gitignore CHANGED
@@ -6,6 +6,7 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.gem
9
10
 
10
11
  # rspec failure tracking
11
12
  .rspec_status
data/.kodiak.toml ADDED
@@ -0,0 +1,14 @@
1
+ # https://github.com/chdsbd/kodiak
2
+ version = 1
3
+
4
+ [approve]
5
+ auto_approve_usernames = ["dependabot"]
6
+
7
+ [merge]
8
+ method = "squash"
9
+ delete_branch_on_merge = true
10
+
11
+ [merge.message]
12
+ title = "pull_request_title"
13
+ include_pr_number = true
14
+ body = "pull_request_body"
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.7.2
data/Gemfile CHANGED
@@ -2,6 +2,3 @@ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in xml_data_extractor.gemspec
4
4
  gemspec
5
-
6
- gem "rake", "~> 12.0"
7
- gem "rspec", "~> 3.0"
data/Gemfile.lock CHANGED
@@ -1,45 +1,46 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- xml_data_extractor (0.3.0)
4
+ xml_data_extractor (0.6.0)
5
5
  activesupport (~> 6.0)
6
6
  nokogiri (~> 1.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- activesupport (6.0.3.4)
11
+ activesupport (6.1.5)
12
12
  concurrent-ruby (~> 1.0, >= 1.0.2)
13
- i18n (>= 0.7, < 2)
14
- minitest (~> 5.1)
15
- tzinfo (~> 1.1)
16
- zeitwerk (~> 2.2, >= 2.2.2)
17
- concurrent-ruby (1.1.7)
18
- diff-lcs (1.3)
19
- i18n (1.8.5)
13
+ i18n (>= 1.6, < 2)
14
+ minitest (>= 5.1)
15
+ tzinfo (~> 2.0)
16
+ zeitwerk (~> 2.3)
17
+ concurrent-ruby (1.1.10)
18
+ diff-lcs (1.5.0)
19
+ i18n (1.10.0)
20
20
  concurrent-ruby (~> 1.0)
21
- mini_portile2 (2.4.0)
22
- minitest (5.14.2)
23
- nokogiri (1.10.10)
24
- mini_portile2 (~> 2.4.0)
21
+ mini_portile2 (2.8.0)
22
+ minitest (5.15.0)
23
+ nokogiri (1.13.3)
24
+ mini_portile2 (~> 2.8.0)
25
+ racc (~> 1.4)
26
+ racc (1.6.0)
25
27
  rake (12.3.3)
26
- rspec (3.9.0)
27
- rspec-core (~> 3.9.0)
28
- rspec-expectations (~> 3.9.0)
29
- rspec-mocks (~> 3.9.0)
30
- rspec-core (3.9.2)
31
- rspec-support (~> 3.9.3)
32
- rspec-expectations (3.9.2)
28
+ rspec (3.11.0)
29
+ rspec-core (~> 3.11.0)
30
+ rspec-expectations (~> 3.11.0)
31
+ rspec-mocks (~> 3.11.0)
32
+ rspec-core (3.11.0)
33
+ rspec-support (~> 3.11.0)
34
+ rspec-expectations (3.11.0)
33
35
  diff-lcs (>= 1.2.0, < 2.0)
34
- rspec-support (~> 3.9.0)
35
- rspec-mocks (3.9.1)
36
+ rspec-support (~> 3.11.0)
37
+ rspec-mocks (3.11.0)
36
38
  diff-lcs (>= 1.2.0, < 2.0)
37
- rspec-support (~> 3.9.0)
38
- rspec-support (3.9.3)
39
- thread_safe (0.3.6)
40
- tzinfo (1.2.7)
41
- thread_safe (~> 0.1)
42
- zeitwerk (2.4.1)
39
+ rspec-support (~> 3.11.0)
40
+ rspec-support (3.11.0)
41
+ tzinfo (2.0.4)
42
+ concurrent-ruby (~> 1.0)
43
+ zeitwerk (2.5.4)
43
44
 
44
45
  PLATFORMS
45
46
  ruby
@@ -50,4 +51,4 @@ DEPENDENCIES
50
51
  xml_data_extractor!
51
52
 
52
53
  BUNDLED WITH
53
- 2.1.4
54
+ 2.2.6
data/README.md CHANGED
@@ -171,7 +171,6 @@ schemas:
171
171
  within: info/movie_data
172
172
  title: original_title
173
173
  actor: main_actor
174
-
175
174
  ```
176
175
  ```xml
177
176
  <xml>
@@ -187,6 +186,42 @@ schemas:
187
186
  { movie: { title: "The Irishman", actor: "Robert De Niro" } }
188
187
  ```
189
188
 
189
+ #### unescape
190
+
191
+ This option is pretty usefull when you have embbed XML or HTML inside some tag, like CDATA elements, and you need to unescape them first in order to parse their content:
192
+
193
+ ```yml
194
+ schemas:
195
+ movie:
196
+ unescape: response
197
+ title: response/original_title
198
+ actor: response/main_actor
199
+
200
+ ```
201
+
202
+ ```xml
203
+ <xml>
204
+ <response>
205
+ &ltoriginal_title&gt1&ltoriginal_title&gt&ltmain_actor&gt1&ltmain_actor&gt
206
+ </response>
207
+ </xml>
208
+ ```
209
+
210
+ This XML will be turned into this one during the parsing:
211
+
212
+ ```xml
213
+ <xml>
214
+ <response>
215
+ <original_title>The Irishman</original_title>
216
+ <main_actor>Robert De Niro</main_actor>
217
+ </response>
218
+ </xml>
219
+ ```
220
+
221
+ ```ruby
222
+ { movie: { title: "The Irishman", actor: "Robert De Niro" } }
223
+ ```
224
+
190
225
  #### array_of
191
226
 
192
227
  Defines the path to a XML collection, which will be looped generating an array of hashes:
@@ -265,6 +300,295 @@ schemas:
265
300
  }
266
301
  ```
267
302
 
303
+ ### link
304
+
305
+ This command is useful when the XML contains references to other nodes, it works as a SQL JOIN. The path must be and expression containing the `<link>` identifier, which will be replaced by the value fetched from the `link:` command.
306
+
307
+ Example:
308
+ ```yml
309
+ schemas:
310
+ bookings:
311
+ array_of: booking
312
+ date: booking_date
313
+ document: id
314
+ products:
315
+ array_of:
316
+ accomodation:
317
+ path: ../hotel[booking_id=<link>]/accomodation
318
+ link: id
319
+ ```
320
+ ```xml
321
+ <xml>
322
+ <booking>
323
+ <id>1</id>
324
+ <booking_date>2020-01-01</booking_date>
325
+ </booking>
326
+ <booking>
327
+ <id>2</id>
328
+ <booking_date>2020-01-02</booking_date>
329
+ </booking>
330
+ <hotel>
331
+ <booking_id>1</booking_id>
332
+ <accomodation>Standard</accomodation>
333
+ </hotel>
334
+ <hotel>
335
+ <booking_id>2</booking_id>
336
+ <accomodation>Premium</accomodation>
337
+ </hotel>
338
+ </xml>
339
+ ```
340
+ ```ruby
341
+ {
342
+ bookings: [
343
+ {
344
+ date: "2020-01-01",
345
+ document: "1"
346
+ products: [
347
+ { accomodation: "Standard" }
348
+ ]
349
+ },
350
+ {
351
+ date: "2020-01-02",
352
+ document: "2"
353
+ products: [
354
+ { accomodation: "Premium" }
355
+ ]
356
+ }
357
+ ]
358
+ }
359
+ ```
360
+
361
+ In this example if I didn't use the `link` to get only the hotel of each booking, it would have returned two accomodations for each booking and instead of extract a string with the accomodation it would extract an array with all the accomodations for each booking.
362
+
363
+ You can combine the `link` with `array_of` if you want search for a list of elements filtering by some field, just provide the `path` and the `link`:
364
+
365
+ ```yml
366
+ schemas:
367
+ bookings:
368
+ array_of: booking
369
+ date: date
370
+ document: id
371
+ products:
372
+ array_of:
373
+ path: ../products[booking_id=<link>]
374
+ link: id
375
+ ....
376
+ ```
377
+
378
+ ### uniq_by
379
+
380
+ Can only be used with **array_of**.
381
+
382
+ This functionality is useful when some XML nodes are duplicated and you want to extract data from the first occurrence only. It has a behavior similar to Ruby **uniq** method on arrays.
383
+ For each path generated from `array_of`, the value fetched using `uniq_by` will be checked against the generated collection and the path will be discarded if the value already exists.
384
+
385
+ ```yml
386
+ schemas:
387
+ bookings:
388
+ array_of:
389
+ path: booking
390
+ uniq_by: id
391
+ date: bdate
392
+ document: id
393
+ ```
394
+ ```xml
395
+ <xml>
396
+ <booking>
397
+ <id>1</id>
398
+ <bdate>2020-01-01</bdate>
399
+ </booking>
400
+ <booking>
401
+ <id>1</id>
402
+ <bdate>2020-01-01</bdate>
403
+ </booking>
404
+ </xml>
405
+ ```
406
+ ```ruby
407
+ {
408
+ bookings: [
409
+ {
410
+ date: "2020-01-01",
411
+ document: "1"
412
+ }
413
+ ]
414
+ }
415
+ ```
416
+
417
+ In this example if we don't use the tag `uniq_by` there would be extracted two elements with the same data, like:
418
+
419
+ ```ruby
420
+ {
421
+ bookings: [
422
+ {
423
+ date: "2020-01-01",
424
+ document: "1"
425
+ },
426
+ {
427
+ date: "2020-01-01",
428
+ document: "1"
429
+ }
430
+ ]
431
+ }
432
+ ```
433
+
434
+ ### array_presence: first_only
435
+
436
+ The field that contains this property will be only added to the first item of the array.
437
+
438
+ Can only be used in fields that belong to a node of `array_of`.
439
+
440
+ ```yml
441
+ passengers:
442
+ array_of: bookings/booking/passengers/passenger
443
+ id:
444
+ path: document
445
+ modifier: to_s
446
+ name:
447
+ attr: [FirstName, LastName]
448
+ modifier:
449
+ - name: join
450
+ params: [" "]
451
+ rav_tax:
452
+ array_presence: first_only
453
+ path: ../rav
454
+ modifier: to_f
455
+ ```
456
+ ```xml
457
+ <bookings>
458
+ <booking>
459
+ <rav>150<rav>
460
+ <passengers>
461
+ <passenger>
462
+ <document>109.111.019-79</document>
463
+ <FirstName>Marcelo</FirstName>
464
+ <LastName>Lauxen</LastName>
465
+ </passenger>
466
+ <passenger>
467
+ <document>110.155.019-78</document>
468
+ <FirstName>Corona</FirstName>
469
+ <LastName>Virus</LastName>
470
+ </passenger>
471
+ </passengers>
472
+ </booking>
473
+ </bookings>
474
+ ```
475
+ ```ruby
476
+ {
477
+ bookings: [
478
+ {
479
+ passengers: [
480
+ {
481
+ id: "109.111.019-79",
482
+ name: "Marcelo Lauxen",
483
+ tax_rav: 150.00
484
+ },
485
+ {
486
+ id: "110.155.019-78",
487
+ name: "Corona Virus"
488
+ }
489
+ ]
490
+ }
491
+ ]
492
+ }
493
+ ```
494
+
495
+ In this example the field `tax_rav` was only included on the first passenger because this field has the `array_presence: first_only` property.
496
+
497
+ ### in_parent
498
+
499
+ This option allows you to navigate to a parent node of the current node.
500
+
501
+ ```yml
502
+ passengers:
503
+ array_of: bookings/booking/passengers/passenger
504
+ id:
505
+ path: document
506
+ modifier: to_s
507
+ bookings_id:
508
+ in_parent: bookings
509
+ path: id
510
+ ```
511
+ ```xml
512
+ <bookings>
513
+ <bookings_id>8888</bookings_id>
514
+ <booking>
515
+ <passengers>
516
+ <passenger>
517
+ <document>109.111.019-79</document>
518
+ </passenger>
519
+ <passenger>
520
+ <document>110.155.019-78</document>
521
+ </passenger>
522
+ </passengers>
523
+ </booking>
524
+ </bookings>
525
+ ```
526
+ ```ruby
527
+ {
528
+ bookings: [
529
+ {
530
+ passengers: [
531
+ {
532
+ id: "109.111.019-79",
533
+ bookings_id: 8888
534
+ },
535
+ {
536
+ id: "110.155.019-78",
537
+ bookings_id: 8888
538
+ }
539
+ ]
540
+ }
541
+ ]
542
+ }
543
+ ```
544
+
545
+ In this example the value of `bookings_id` will be extracted starting at the node provided in `in_parent` instead of the current node. It's possible to navigate to a parent node with `../` too (xpath provides this functionality), but using `in_parent` you just need to provide the name of the parent node, it will navigate up until the parent node is found, no matter how many levels.
546
+
547
+ ### keep_if
548
+
549
+ This option allows you to keep the part of the block of the hash in the final result only if the condition matches.
550
+
551
+ ```yml
552
+ schemas:
553
+ dummy:
554
+ within: data
555
+ description: additional_desc
556
+ exchange: currency_info/value
557
+ price: price
558
+ payment:
559
+ type: payment_info/method
560
+ value: payment_info/price
561
+ keep_if: "'type' == 'invoice'"
562
+ ```
563
+ ```xml
564
+ <data>
565
+ <additional_desc>Keep walking</additional_desc>
566
+ <currency_info kind="USD">
567
+ <value>4.15</value>
568
+ </currency_info>
569
+ <price>55.09</price>
570
+ <payment_info>
571
+ <method>card</method>
572
+ <price>55.48</price>
573
+ <payment>
574
+ <installments>2</installments>
575
+ <card_number>333</card_number>
576
+ </payment>
577
+ </payment>
578
+ <data>
579
+ ```
580
+ ```ruby
581
+ {
582
+ dummy: {
583
+ description: "Keep walking",
584
+ exchange: "4.15",
585
+ price: "55.09"
586
+ }
587
+ }
588
+ ```
589
+
590
+ In this example the condition didn't match since the payment method was `card` instead of `invoice` and then the extracted payment hash was removed from the final result.
591
+
268
592
  ### Formatting:
269
593
 
270
594
  #### fixed
@@ -344,7 +668,7 @@ schemas:
344
668
  path: [firstname, lastname]
345
669
  modifier:
346
670
  - name: join
347
- params: [" "]
671
+ params: [" "]
348
672
  - downcase
349
673
  ```
350
674
  ```xml
data/bin/rspec ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayOf < Base
3
5
  def initialize(node, extractor, index = 0)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class ArrayValue < Base
3
5
  def value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Base
3
5
  def initialize(node, extractor)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Expression
3
5
  def initialize(expression, hash)
@@ -6,9 +8,8 @@ module Extract
6
8
  end
7
9
 
8
10
  def evaluate
9
- field_name = expression.split.first.parameterize
10
- field_value = hash[field_name.to_sym]
11
- condition = expression.gsub(field_name, field_value.to_s)
11
+ keys = Regexp.union(hash.keys.map(&:to_s))
12
+ condition = expression.gsub(keys, hash.stringify_keys)
12
13
 
13
14
  eval(condition)
14
15
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class HashBuilder < Base
3
- INTERNAL_FIELDS = %i[array_of keep_if within].freeze
5
+ INTERNAL_FIELDS = %i[array_of keep_if within unescape].freeze
4
6
 
5
7
  def value(index = 0)
6
8
  path, props = node.to_h.values_at(:path, :props)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class StringValue < Base
3
5
  def value
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Extract
4
+ class Unescape < Base
5
+ def unescape!
6
+ unescape_tag = node.props[:unescape]
7
+
8
+ paths_to_unescape = extractor.paths_of(node.path, unescape_tag)
9
+ return if paths_to_unescape.empty?
10
+
11
+ paths_to_unescape.each { |path| extractor.unescape!(path) }
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "base"
2
4
  require_relative "array_value"
3
5
  require_relative "array_of"
@@ -6,6 +8,7 @@ require_relative "string_value"
6
8
  require_relative "value_builder"
7
9
  require_relative "within"
8
10
  require_relative "expression"
11
+ require_relative "unescape"
9
12
 
10
13
  module Extract
11
14
  class ValueBuilder < Base
@@ -24,6 +27,9 @@ module Extract
24
27
 
25
28
  def value_for_hash
26
29
  props = node.props
30
+
31
+ Unescape.new(node, extractor).unescape! if props[:unescape]
32
+
27
33
  fixed_value = props[:fixed]
28
34
  return fixed_value if fixed_value
29
35
  return ArrayOf.new(node, extractor).value if props[:array_of]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Extract
2
4
  class Within < Base
3
5
  def value
data/lib/src/extractor.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "cgi"
2
4
  require "active_support/core_ext/string"
3
5
  require_relative "format/formatter"
@@ -37,11 +39,11 @@ class PathBuilder < Struct.new(:base, :parent, :tag, keyword_init: true)
37
39
  end
38
40
 
39
41
  def matching_tags?(item, tag)
40
- item.gsub(/\[\d\]/, "") == tag
42
+ item.gsub(/\[\d+\]/, "") == tag
41
43
  end
42
44
  end
43
45
 
44
- class NodeParamsExtractor < Struct.new(:node)
46
+ class NodeParamsExtractor < Struct.new(:node)
45
47
  def extract
46
48
  [node.path, *node.props.values_at(:in_parent, :path, :link, :attr)]
47
49
  end
@@ -49,7 +51,7 @@ end
49
51
 
50
52
  class NodeExtractor
51
53
  def initialize(xml)
52
- @xml = Nokogiri::XML(remove_special_elements(xml), nil, Encoding::UTF_8.to_s)
54
+ @xml = Nokogiri::XML(xml)
53
55
  @xml.remove_namespaces!
54
56
  end
55
57
 
@@ -59,12 +61,18 @@ class NodeExtractor
59
61
  nil
60
62
  end
61
63
 
62
- private
64
+ def unescape!(path)
65
+ node = extract(path)
66
+ return if node.blank?
63
67
 
64
- def remove_special_elements(xml)
65
- CGI.unescapeHTML(xml).gsub(/<br>|<\/br>|&nbsp;/, { "&nbsp;" => " ", "<br>" => "", "</br>" => "" })
68
+ first_node = node.first
69
+ return if first_node.elements.present?
70
+
71
+ first_node.children = Nokogiri::XML.fragment(first_node.content).children
66
72
  end
67
73
 
74
+ private
75
+
68
76
  attr_reader :xml
69
77
  end
70
78
 
@@ -193,7 +201,11 @@ class Extractor
193
201
  end
194
202
 
195
203
  value = path_value(path, tag, attribute)
196
- format_value(value, node.props)
204
+ format_value(value, node.props)
205
+ end
206
+
207
+ def unescape!(path)
208
+ node_extractor.unescape!(path)
197
209
  end
198
210
 
199
211
  def format_value(value, props)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "mapper"
2
4
  require_relative "modifier"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Mapper
3
5
  def initialize(yml)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Format
2
4
  class Modifier
3
5
  def initialize(yml, helper)
data/lib/src/node.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Node < Struct.new(:props, :path)
2
4
  def initialize(*)
3
5
  super
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "xml_data_extractor"
3
- spec.version = "0.3.0"
3
+ spec.version = "0.6.0"
4
4
  spec.authors = ["Fernando Almeida"]
5
5
  spec.email = ["fernandoprsbr@gmail.com"]
6
6
 
7
7
  spec.summary = "Provides a simples DSL for extracting data from XML documents"
8
8
  spec.homepage = "https://github.com/monde-sistemas/xml_data_extractor"
9
9
  spec.license = "MIT"
10
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
10
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5")
11
11
 
12
12
  spec.metadata["homepage_uri"] = spec.homepage
13
13
  spec.metadata["source_code_uri"] = spec.homepage
@@ -24,5 +24,6 @@ Gem::Specification.new do |spec|
24
24
 
25
25
  spec.add_dependency "nokogiri", "~> 1.0"
26
26
  spec.add_dependency "activesupport", "~> 6.0"
27
+ spec.add_development_dependency "rake", "~> 12.0"
27
28
  spec.add_development_dependency "rspec", "~> 3.0"
28
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_data_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fernando Almeida
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-30 00:00:00.000000000 Z
11
+ date: 2022-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '6.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '12.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '12.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -59,15 +73,20 @@ executables: []
59
73
  extensions: []
60
74
  extra_rdoc_files: []
61
75
  files:
76
+ - ".editorconfig"
77
+ - ".github/dependabot.yml"
78
+ - ".github/workflows/ci.yml"
62
79
  - ".gitignore"
80
+ - ".kodiak.toml"
63
81
  - ".rspec"
64
- - ".travis.yml"
82
+ - ".ruby-version"
65
83
  - Gemfile
66
84
  - Gemfile.lock
67
85
  - LICENSE.txt
68
86
  - README.md
69
87
  - Rakefile
70
88
  - bin/console
89
+ - bin/rspec
71
90
  - bin/setup
72
91
  - lib/src/extract/array_of.rb
73
92
  - lib/src/extract/array_value.rb
@@ -75,6 +94,7 @@ files:
75
94
  - lib/src/extract/expression.rb
76
95
  - lib/src/extract/hash_builder.rb
77
96
  - lib/src/extract/string_value.rb
97
+ - lib/src/extract/unescape.rb
78
98
  - lib/src/extract/value_builder.rb
79
99
  - lib/src/extract/within.rb
80
100
  - lib/src/extractor.rb
@@ -99,14 +119,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
99
119
  requirements:
100
120
  - - ">="
101
121
  - !ruby/object:Gem::Version
102
- version: 2.3.0
122
+ version: '2.5'
103
123
  required_rubygems_version: !ruby/object:Gem::Requirement
104
124
  requirements:
105
125
  - - ">="
106
126
  - !ruby/object:Gem::Version
107
127
  version: '0'
108
128
  requirements: []
109
- rubygems_version: 3.1.4
129
+ rubygems_version: 3.2.25
110
130
  signing_key:
111
131
  specification_version: 4
112
132
  summary: Provides a simples DSL for extracting data from XML documents
data/.travis.yml DELETED
@@ -1,6 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.6.6
6
- before_install: gem install bundler -v 2.1.4