text_extractor 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c42725b840531e241a7353991f7fec182561a89c
4
- data.tar.gz: 07b7639b33d4d7b9380e9674c4a3f6531c3b03ff
3
+ metadata.gz: 5412586591653945efffad2ce8783c9269e58582
4
+ data.tar.gz: dc9fdb2f1eaad7bfafab31ad13e3e5eff966ea22
5
5
  SHA512:
6
- metadata.gz: 9cadfc1dee9915d1b0b259de9b006154486aaba50808246bff28af16630ad6f481e8fba20289d1f60b315f19d98df44320df2af5e33b58096faaa6596163fa54
7
- data.tar.gz: 9c3ee7b8a460612908a24bc64cc765aea243da2d2e9462b16630db1402fd6b7f75e2877be55341b968ac708d1b9b06c04b6424f50ecc5b5bafce373c7f5a049d
6
+ metadata.gz: f8c4a71b402c49c136700eab64daeba10a48938285d1d06e1522bb8bbd47fe1cf5f6962f0d2dcc555fa413164185e50eed069a52ac6bc4b253b1dd973529d09b
7
+ data.tar.gz: 73117018ffc3542a71aa201ac84ddc5d038881605e3a3fdccabce1d0d65a77137f2b731cac6a4463c8e3902c431fa418e166bcefe0cd6feedb899616a75a9e29
@@ -16,6 +16,8 @@ class TextExtractor
16
16
  @records = []
17
17
  @filldowns = []
18
18
  @current_record_values = []
19
+ @section_delimiter = nil
20
+ @section_terminator = nil
19
21
  instance_exec(&block)
20
22
  end
21
23
 
@@ -77,6 +79,11 @@ class TextExtractor
77
79
  @records << klass.new(instance_exec(&block), **kwargs)
78
80
  end
79
81
 
82
+ def section(delimiter, terminator = nil)
83
+ @section_delimiter = delimiter
84
+ @section_terminator = terminator
85
+ end
86
+
80
87
  def filldown(**kwargs, &block)
81
88
  raise "#{self.class}.filldown requires a block" unless block
82
89
  record(Filldown, **kwargs, &block)
@@ -87,7 +94,19 @@ class TextExtractor
87
94
  end
88
95
 
89
96
  def scan(input)
90
- Extraction.new(input, self).scan.extraction_matches
97
+ prefill = {}
98
+ sections(input).flat_map { |section|
99
+ Extraction.new(section, self, prefill).scan.extraction_matches
100
+ }
101
+ end
102
+
103
+ def sections(input)
104
+ return [input] unless @section_delimiter
105
+
106
+ texts = input.split(@section_delimiter)
107
+ return texts unless @section_terminator
108
+
109
+ texts.map { |section| section + @section_terminator }
91
110
  end
92
111
 
93
112
  def regexps
@@ -3,16 +3,16 @@ class TextExtractor
3
3
  class Extraction
4
4
  attr_reader :input, :extractor, :re, :pos, :matches, :values
5
5
 
6
- def initialize(input, extractor)
6
+ def initialize(input, extractor, fill = {})
7
7
  @input = input
8
8
  @extractor = extractor
9
+ @fill = fill
9
10
  @pos = 0
10
11
  @matches = []
11
12
  @last_match = nil
12
13
  end
13
14
 
14
15
  def extraction_matches
15
- @fill = {}
16
16
  matches.flat_map do |match|
17
17
  extraction_match(match)
18
18
  end
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.3.0'
3
+ '0.4.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-23 00:00:00.000000000 Z
11
+ date: 2016-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -88,8 +88,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  version: '0'
89
89
  requirements: []
90
90
  rubyforge_project:
91
- rubygems_version: 2.5.1
91
+ rubygems_version: 2.5.2
92
92
  signing_key:
93
93
  specification_version: 4
94
94
  summary: Easily extract data from text
95
95
  test_files: []
96
+ has_rdoc: