text_extractor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78291099c84221f93ff6068099d736bc6703579b
4
- data.tar.gz: 8a65b9164e395200fac46207b6b313dfa8084a2b
3
+ metadata.gz: c42725b840531e241a7353991f7fec182561a89c
4
+ data.tar.gz: 07b7639b33d4d7b9380e9674c4a3f6531c3b03ff
5
5
  SHA512:
6
- metadata.gz: 7b18d39bde364c3417ccd18ff4ce8f072568e3e63b838b1bdce46b25cffd6b7356dbf2f4e9b1f6fd816368ce0aa194a1e6cc9f4ccd584f579a85eb795b264ddd
7
- data.tar.gz: a99721d0d2a48b5364dd301415334fdd85563664144d3d4a216c70d2c39ecd7654b64b9e49c78f582bb762c4dae8d33abc809b45807328f35b010fc3aff31482
6
+ metadata.gz: 9cadfc1dee9915d1b0b259de9b006154486aaba50808246bff28af16630ad6f481e8fba20289d1f60b315f19d98df44320df2af5e33b58096faaa6596163fa54
7
+ data.tar.gz: 9c3ee7b8a460612908a24bc64cc765aea243da2d2e9462b16630db1402fd6b7f75e2877be55341b968ac708d1b9b06c04b6424f50ecc5b5bafce373c7f5a049d
@@ -2,6 +2,7 @@ require_relative 'text_extractor/extraction'
2
2
  require_relative 'text_extractor/filldown'
3
3
  require_relative 'text_extractor/record'
4
4
  require_relative 'text_extractor/value'
5
+ require_relative 'text_extractor/inline_value'
5
6
 
6
7
  # represents an extractor definition
7
8
  class TextExtractor
@@ -41,6 +42,10 @@ class TextExtractor
41
42
  end
42
43
  end
43
44
 
45
+ def inline(id, &block)
46
+ @values[id] = InlineValue.new(id, &block)
47
+ end
48
+
44
49
  def boolean(id, re = Patterns::BOOLEAN)
45
50
  value(id, re) { |val| !val.match(Patterns::FALSE) }
46
51
  end
@@ -67,6 +72,7 @@ class TextExtractor
67
72
 
68
73
  def record(klass = Record, **kwargs, &block)
69
74
  raise "#{self.class}.record requires a block" unless block
75
+ kwargs[:extractor_values] = values
70
76
  kwargs[:values] = @current_record_values = []
71
77
  @records << klass.new(instance_exec(&block), **kwargs)
72
78
  end
@@ -25,6 +25,8 @@ class TextExtractor
25
25
  def initialize(original)
26
26
  @source = original.source
27
27
  @options = original.options
28
+ @output = nil
29
+ @directives = []
28
30
  end
29
31
 
30
32
  def expand
@@ -36,12 +38,17 @@ class TextExtractor
36
38
  @output = Regexp.new(@state.target.join(''), @options)
37
39
  end
38
40
 
41
+ def values
42
+ @directives.flat_map(&:values)
43
+ end
44
+
39
45
  private
40
46
 
41
47
  DIRECTIVE_MAP = {
42
48
  ' ' => { class: Comment },
43
49
  'any' => { class: Any },
44
50
  'begin' => { class: Begin, arguments: :parsed },
51
+ 'capture' => { class: Capture, arguments: :parsed },
45
52
  'end' => { class: End },
46
53
  'maybe' => { class: Maybe },
47
54
  'repeat' => { class: Repeat, arguments: :parse },
@@ -92,6 +99,7 @@ class TextExtractor
92
99
  return [Comment.new(@state)] if full_source.start_with?(' ')
93
100
  split_directives(full_source)
94
101
  .map { |source| parse_one_directive(source) }
102
+ .each { |directive| @directives << directive }
95
103
  end
96
104
 
97
105
  def parse_one_directive(source)
@@ -1,3 +1,5 @@
1
+ require 'text_extractor/inline_value'
2
+
1
3
  class TextExtractor
2
4
  class Directives
3
5
  # base class for line directives
@@ -9,6 +11,10 @@ class TextExtractor
9
11
  @argument = argument
10
12
  init if respond_to?(:init)
11
13
  end
14
+
15
+ def values
16
+ []
17
+ end
12
18
  end
13
19
 
14
20
  # open a line group
@@ -42,6 +48,17 @@ class TextExtractor
42
48
  end
43
49
  end
44
50
 
51
+ # capture group that creates a value
52
+ class Capture < Begin
53
+ def group(name, *args)
54
+ CaptureGroup.new(name, *args)
55
+ end
56
+
57
+ def values
58
+ [InlineValue.new(@argument.to_sym)]
59
+ end
60
+ end
61
+
45
62
  # text that will be omitted from the regexp
46
63
  class Comment < Directive
47
64
  def call
@@ -42,5 +42,17 @@ class TextExtractor
42
42
  ['(?:', *@lines.flat_map { |e| [e, '|'] }[0..-2], ')']
43
43
  end
44
44
  end
45
+
46
+ # a line group that will be captured to a value
47
+ class CaptureGroup < Group
48
+ def initialize(name, *args)
49
+ @name = name
50
+ @lines = args
51
+ end
52
+
53
+ def join
54
+ ["(?<#{@name}>", *@lines, ')']
55
+ end
56
+ end
45
57
  end
46
58
  end
@@ -0,0 +1,13 @@
1
+ require 'text_extractor/value'
2
+
3
+ class TextExtractor
4
+ # represents a value given by a .capture directive
5
+ class InlineValue < Value
6
+ def initialize(id, &block)
7
+ @id = id
8
+ @block = block
9
+ end
10
+
11
+ alias re id
12
+ end
13
+ end
@@ -5,12 +5,14 @@ class TextExtractor
5
5
  attr_reader :regexp, :factory, :values
6
6
 
7
7
  def initialize(regexp, factory: nil, values: [], fill: [], directives: true,
8
- strip: nil)
9
- @regexp = build_regexp(regexp, directives, strip)
8
+ inline: [], extractor_values: {}, strip: nil)
10
9
  @factory = factory
11
10
  @constructor = FactoryAnalyzer.new(factory).to_proc
11
+ @extractor_values = extractor_values
12
12
  @values = values.map { |val| [val.id, val] }.to_h
13
+ initialize_inline_values(inline)
13
14
  @default_values = values.map { |val| [val.id, nil] }.to_h
15
+ @regexp = build_regexp(regexp, directives, strip)
14
16
  @fill = Array(fill)
15
17
  end
16
18
 
@@ -55,7 +57,12 @@ class TextExtractor
55
57
 
56
58
  def expand_regexp(regexp, directives)
57
59
  if directives
58
- TextExtractor.expand_directives(regexp)
60
+ expander = Directives.new(regexp)
61
+ expanded = expander.expand
62
+ expander.values.each { |value|
63
+ values[value.id] = @extractor_values.fetch(value.id, value)
64
+ }
65
+ expanded
59
66
  else
60
67
  regexp
61
68
  end
@@ -96,9 +103,18 @@ class TextExtractor
96
103
  values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h
97
104
  end
98
105
 
106
+ def initialize_inline_values(inline_values)
107
+ inline_values.each do |value|
108
+ @values[value] = @extractor_values
109
+ .fetch(value) { InlineValue.new(value) }
110
+ end
111
+ end
112
+
99
113
  # converts the value of the factory option to a constructor proc
100
114
  class FactoryAnalyzer
101
115
  def initialize(factory)
116
+ @params = nil
117
+
102
118
  case factory
103
119
  when Hash
104
120
  @klass, @params = factory.first
@@ -5,7 +5,7 @@ class TextExtractor
5
5
  def initialize(id, re, &block)
6
6
  @id = id
7
7
  @re = re
8
- @block = block if block_given?
8
+ @block = block
9
9
  end
10
10
 
11
11
  def convert(value)
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.2.0'
3
+ '0.3.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-13 00:00:00.000000000 Z
11
+ date: 2016-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -64,6 +64,7 @@ files:
64
64
  - lib/text_extractor/directives/group.rb
65
65
  - lib/text_extractor/extraction.rb
66
66
  - lib/text_extractor/filldown.rb
67
+ - lib/text_extractor/inline_value.rb
67
68
  - lib/text_extractor/record.rb
68
69
  - lib/text_extractor/value.rb
69
70
  - lib/text_extractor/version.rb
@@ -92,4 +93,3 @@ signing_key:
92
93
  specification_version: 4
93
94
  summary: Easily extract data from text
94
95
  test_files: []
95
- has_rdoc: