text_extractor 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/text_extractor.rb +20 -1
- data/lib/text_extractor/extraction.rb +2 -2
- data/lib/text_extractor/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5412586591653945efffad2ce8783c9269e58582
|
4
|
+
data.tar.gz: dc9fdb2f1eaad7bfafab31ad13e3e5eff966ea22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8c4a71b402c49c136700eab64daeba10a48938285d1d06e1522bb8bbd47fe1cf5f6962f0d2dcc555fa413164185e50eed069a52ac6bc4b253b1dd973529d09b
|
7
|
+
data.tar.gz: 73117018ffc3542a71aa201ac84ddc5d038881605e3a3fdccabce1d0d65a77137f2b731cac6a4463c8e3902c431fa418e166bcefe0cd6feedb899616a75a9e29
|
data/lib/text_extractor.rb
CHANGED
@@ -16,6 +16,8 @@ class TextExtractor
|
|
16
16
|
@records = []
|
17
17
|
@filldowns = []
|
18
18
|
@current_record_values = []
|
19
|
+
@section_delimiter = nil
|
20
|
+
@section_terminator = nil
|
19
21
|
instance_exec(&block)
|
20
22
|
end
|
21
23
|
|
@@ -77,6 +79,11 @@ class TextExtractor
|
|
77
79
|
@records << klass.new(instance_exec(&block), **kwargs)
|
78
80
|
end
|
79
81
|
|
82
|
+
def section(delimiter, terminator = nil)
|
83
|
+
@section_delimiter = delimiter
|
84
|
+
@section_terminator = terminator
|
85
|
+
end
|
86
|
+
|
80
87
|
def filldown(**kwargs, &block)
|
81
88
|
raise "#{self.class}.filldown requires a block" unless block
|
82
89
|
record(Filldown, **kwargs, &block)
|
@@ -87,7 +94,19 @@ class TextExtractor
|
|
87
94
|
end
|
88
95
|
|
89
96
|
def scan(input)
|
90
|
-
|
97
|
+
prefill = {}
|
98
|
+
sections(input).flat_map { |section|
|
99
|
+
Extraction.new(section, self, prefill).scan.extraction_matches
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def sections(input)
|
104
|
+
return [input] unless @section_delimiter
|
105
|
+
|
106
|
+
texts = input.split(@section_delimiter)
|
107
|
+
return texts unless @section_terminator
|
108
|
+
|
109
|
+
texts.map { |section| section + @section_terminator }
|
91
110
|
end
|
92
111
|
|
93
112
|
def regexps
|
@@ -3,16 +3,16 @@ class TextExtractor
|
|
3
3
|
class Extraction
|
4
4
|
attr_reader :input, :extractor, :re, :pos, :matches, :values
|
5
5
|
|
6
|
-
def initialize(input, extractor)
|
6
|
+
def initialize(input, extractor, fill = {})
|
7
7
|
@input = input
|
8
8
|
@extractor = extractor
|
9
|
+
@fill = fill
|
9
10
|
@pos = 0
|
10
11
|
@matches = []
|
11
12
|
@last_match = nil
|
12
13
|
end
|
13
14
|
|
14
15
|
def extraction_matches
|
15
|
-
@fill = {}
|
16
16
|
matches.flat_map do |match|
|
17
17
|
extraction_match(match)
|
18
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Miller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -88,8 +88,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
88
|
version: '0'
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
|
-
rubygems_version: 2.5.
|
91
|
+
rubygems_version: 2.5.2
|
92
92
|
signing_key:
|
93
93
|
specification_version: 4
|
94
94
|
summary: Easily extract data from text
|
95
95
|
test_files: []
|
96
|
+
has_rdoc:
|