text_extractor 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78291099c84221f93ff6068099d736bc6703579b
4
- data.tar.gz: 8a65b9164e395200fac46207b6b313dfa8084a2b
3
+ metadata.gz: c42725b840531e241a7353991f7fec182561a89c
4
+ data.tar.gz: 07b7639b33d4d7b9380e9674c4a3f6531c3b03ff
5
5
  SHA512:
6
- metadata.gz: 7b18d39bde364c3417ccd18ff4ce8f072568e3e63b838b1bdce46b25cffd6b7356dbf2f4e9b1f6fd816368ce0aa194a1e6cc9f4ccd584f579a85eb795b264ddd
7
- data.tar.gz: a99721d0d2a48b5364dd301415334fdd85563664144d3d4a216c70d2c39ecd7654b64b9e49c78f582bb762c4dae8d33abc809b45807328f35b010fc3aff31482
6
+ metadata.gz: 9cadfc1dee9915d1b0b259de9b006154486aaba50808246bff28af16630ad6f481e8fba20289d1f60b315f19d98df44320df2af5e33b58096faaa6596163fa54
7
+ data.tar.gz: 9c3ee7b8a460612908a24bc64cc765aea243da2d2e9462b16630db1402fd6b7f75e2877be55341b968ac708d1b9b06c04b6424f50ecc5b5bafce373c7f5a049d
@@ -2,6 +2,7 @@ require_relative 'text_extractor/extraction'
2
2
  require_relative 'text_extractor/filldown'
3
3
  require_relative 'text_extractor/record'
4
4
  require_relative 'text_extractor/value'
5
+ require_relative 'text_extractor/inline_value'
5
6
 
6
7
  # represents an extractor definition
7
8
  class TextExtractor
@@ -41,6 +42,10 @@ class TextExtractor
41
42
  end
42
43
  end
43
44
 
45
+ def inline(id, &block)
46
+ @values[id] = InlineValue.new(id, &block)
47
+ end
48
+
44
49
  def boolean(id, re = Patterns::BOOLEAN)
45
50
  value(id, re) { |val| !val.match(Patterns::FALSE) }
46
51
  end
@@ -67,6 +72,7 @@ class TextExtractor
67
72
 
68
73
  def record(klass = Record, **kwargs, &block)
69
74
  raise "#{self.class}.record requires a block" unless block
75
+ kwargs[:extractor_values] = values
70
76
  kwargs[:values] = @current_record_values = []
71
77
  @records << klass.new(instance_exec(&block), **kwargs)
72
78
  end
@@ -25,6 +25,8 @@ class TextExtractor
25
25
  def initialize(original)
26
26
  @source = original.source
27
27
  @options = original.options
28
+ @output = nil
29
+ @directives = []
28
30
  end
29
31
 
30
32
  def expand
@@ -36,12 +38,17 @@ class TextExtractor
36
38
  @output = Regexp.new(@state.target.join(''), @options)
37
39
  end
38
40
 
41
+ def values
42
+ @directives.flat_map(&:values)
43
+ end
44
+
39
45
  private
40
46
 
41
47
  DIRECTIVE_MAP = {
42
48
  ' ' => { class: Comment },
43
49
  'any' => { class: Any },
44
50
  'begin' => { class: Begin, arguments: :parsed },
51
+ 'capture' => { class: Capture, arguments: :parsed },
45
52
  'end' => { class: End },
46
53
  'maybe' => { class: Maybe },
47
54
  'repeat' => { class: Repeat, arguments: :parse },
@@ -92,6 +99,7 @@ class TextExtractor
92
99
  return [Comment.new(@state)] if full_source.start_with?(' ')
93
100
  split_directives(full_source)
94
101
  .map { |source| parse_one_directive(source) }
102
+ .each { |directive| @directives << directive }
95
103
  end
96
104
 
97
105
  def parse_one_directive(source)
@@ -1,3 +1,5 @@
1
+ require 'text_extractor/inline_value'
2
+
1
3
  class TextExtractor
2
4
  class Directives
3
5
  # base class for line directives
@@ -9,6 +11,10 @@ class TextExtractor
9
11
  @argument = argument
10
12
  init if respond_to?(:init)
11
13
  end
14
+
15
+ def values
16
+ []
17
+ end
12
18
  end
13
19
 
14
20
  # open a line group
@@ -42,6 +48,17 @@ class TextExtractor
42
48
  end
43
49
  end
44
50
 
51
+ # capture group that creates a value
52
+ class Capture < Begin
53
+ def group(name, *args)
54
+ CaptureGroup.new(name, *args)
55
+ end
56
+
57
+ def values
58
+ [InlineValue.new(@argument.to_sym)]
59
+ end
60
+ end
61
+
45
62
  # text that will be omitted from the regexp
46
63
  class Comment < Directive
47
64
  def call
@@ -42,5 +42,17 @@ class TextExtractor
42
42
  ['(?:', *@lines.flat_map { |e| [e, '|'] }[0..-2], ')']
43
43
  end
44
44
  end
45
+
46
+ # a line group that will be captured to a value
47
+ class CaptureGroup < Group
48
+ def initialize(name, *args)
49
+ @name = name
50
+ @lines = args
51
+ end
52
+
53
+ def join
54
+ ["(?<#{@name}>", *@lines, ')']
55
+ end
56
+ end
45
57
  end
46
58
  end
@@ -0,0 +1,13 @@
1
+ require 'text_extractor/value'
2
+
3
+ class TextExtractor
4
+ # represents a value given by a .capture directive
5
+ class InlineValue < Value
6
+ def initialize(id, &block)
7
+ @id = id
8
+ @block = block
9
+ end
10
+
11
+ alias re id
12
+ end
13
+ end
@@ -5,12 +5,14 @@ class TextExtractor
5
5
  attr_reader :regexp, :factory, :values
6
6
 
7
7
  def initialize(regexp, factory: nil, values: [], fill: [], directives: true,
8
- strip: nil)
9
- @regexp = build_regexp(regexp, directives, strip)
8
+ inline: [], extractor_values: {}, strip: nil)
10
9
  @factory = factory
11
10
  @constructor = FactoryAnalyzer.new(factory).to_proc
11
+ @extractor_values = extractor_values
12
12
  @values = values.map { |val| [val.id, val] }.to_h
13
+ initialize_inline_values(inline)
13
14
  @default_values = values.map { |val| [val.id, nil] }.to_h
15
+ @regexp = build_regexp(regexp, directives, strip)
14
16
  @fill = Array(fill)
15
17
  end
16
18
 
@@ -55,7 +57,12 @@ class TextExtractor
55
57
 
56
58
  def expand_regexp(regexp, directives)
57
59
  if directives
58
- TextExtractor.expand_directives(regexp)
60
+ expander = Directives.new(regexp)
61
+ expanded = expander.expand
62
+ expander.values.each { |value|
63
+ values[value.id] = @extractor_values.fetch(value.id, value)
64
+ }
65
+ expanded
59
66
  else
60
67
  regexp
61
68
  end
@@ -96,9 +103,18 @@ class TextExtractor
96
103
  values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h
97
104
  end
98
105
 
106
+ def initialize_inline_values(inline_values)
107
+ inline_values.each do |value|
108
+ @values[value] = @extractor_values
109
+ .fetch(value) { InlineValue.new(value) }
110
+ end
111
+ end
112
+
99
113
  # converts the value of the factory option to a constructor proc
100
114
  class FactoryAnalyzer
101
115
  def initialize(factory)
116
+ @params = nil
117
+
102
118
  case factory
103
119
  when Hash
104
120
  @klass, @params = factory.first
@@ -5,7 +5,7 @@ class TextExtractor
5
5
  def initialize(id, re, &block)
6
6
  @id = id
7
7
  @re = re
8
- @block = block if block_given?
8
+ @block = block
9
9
  end
10
10
 
11
11
  def convert(value)
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.2.0'
3
+ '0.3.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-13 00:00:00.000000000 Z
11
+ date: 2016-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -64,6 +64,7 @@ files:
64
64
  - lib/text_extractor/directives/group.rb
65
65
  - lib/text_extractor/extraction.rb
66
66
  - lib/text_extractor/filldown.rb
67
+ - lib/text_extractor/inline_value.rb
67
68
  - lib/text_extractor/record.rb
68
69
  - lib/text_extractor/value.rb
69
70
  - lib/text_extractor/version.rb
@@ -92,4 +93,3 @@ signing_key:
92
93
  specification_version: 4
93
94
  summary: Easily extract data from text
94
95
  test_files: []
95
- has_rdoc: