text_extractor 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c42725b840531e241a7353991f7fec182561a89c
4
- data.tar.gz: 07b7639b33d4d7b9380e9674c4a3f6531c3b03ff
3
+ metadata.gz: 5412586591653945efffad2ce8783c9269e58582
4
+ data.tar.gz: dc9fdb2f1eaad7bfafab31ad13e3e5eff966ea22
5
5
  SHA512:
6
- metadata.gz: 9cadfc1dee9915d1b0b259de9b006154486aaba50808246bff28af16630ad6f481e8fba20289d1f60b315f19d98df44320df2af5e33b58096faaa6596163fa54
7
- data.tar.gz: 9c3ee7b8a460612908a24bc64cc765aea243da2d2e9462b16630db1402fd6b7f75e2877be55341b968ac708d1b9b06c04b6424f50ecc5b5bafce373c7f5a049d
6
+ metadata.gz: f8c4a71b402c49c136700eab64daeba10a48938285d1d06e1522bb8bbd47fe1cf5f6962f0d2dcc555fa413164185e50eed069a52ac6bc4b253b1dd973529d09b
7
+ data.tar.gz: 73117018ffc3542a71aa201ac84ddc5d038881605e3a3fdccabce1d0d65a77137f2b731cac6a4463c8e3902c431fa418e166bcefe0cd6feedb899616a75a9e29
@@ -16,6 +16,8 @@ class TextExtractor
16
16
  @records = []
17
17
  @filldowns = []
18
18
  @current_record_values = []
19
+ @section_delimiter = nil
20
+ @section_terminator = nil
19
21
  instance_exec(&block)
20
22
  end
21
23
 
@@ -77,6 +79,11 @@ class TextExtractor
77
79
  @records << klass.new(instance_exec(&block), **kwargs)
78
80
  end
79
81
 
82
+ def section(delimiter, terminator = nil)
83
+ @section_delimiter = delimiter
84
+ @section_terminator = terminator
85
+ end
86
+
80
87
  def filldown(**kwargs, &block)
81
88
  raise "#{self.class}.filldown requires a block" unless block
82
89
  record(Filldown, **kwargs, &block)
@@ -87,7 +94,19 @@ class TextExtractor
87
94
  end
88
95
 
89
96
  def scan(input)
90
- Extraction.new(input, self).scan.extraction_matches
97
+ prefill = {}
98
+ sections(input).flat_map { |section|
99
+ Extraction.new(section, self, prefill).scan.extraction_matches
100
+ }
101
+ end
102
+
103
+ def sections(input)
104
+ return [input] unless @section_delimiter
105
+
106
+ texts = input.split(@section_delimiter)
107
+ return texts unless @section_terminator
108
+
109
+ texts.map { |section| section + @section_terminator }
91
110
  end
92
111
 
93
112
  def regexps
@@ -3,16 +3,16 @@ class TextExtractor
3
3
  class Extraction
4
4
  attr_reader :input, :extractor, :re, :pos, :matches, :values
5
5
 
6
- def initialize(input, extractor)
6
+ def initialize(input, extractor, fill = {})
7
7
  @input = input
8
8
  @extractor = extractor
9
+ @fill = fill
9
10
  @pos = 0
10
11
  @matches = []
11
12
  @last_match = nil
12
13
  end
13
14
 
14
15
  def extraction_matches
15
- @fill = {}
16
16
  matches.flat_map do |match|
17
17
  extraction_match(match)
18
18
  end
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.3.0'
3
+ '0.4.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-23 00:00:00.000000000 Z
11
+ date: 2016-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -88,8 +88,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  version: '0'
89
89
  requirements: []
90
90
  rubyforge_project:
91
- rubygems_version: 2.5.1
91
+ rubygems_version: 2.5.2
92
92
  signing_key:
93
93
  specification_version: 4
94
94
  summary: Easily extract data from text
95
95
  test_files: []
96
+ has_rdoc: