text_extractor 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e956aff10058c42b11f0cab0fd26f4218e6fcaa9
4
- data.tar.gz: 0f947396657eb89749366e3c84ccb08899e07d19
3
+ metadata.gz: 33c60b21e8e025e62025f06af668dd4859cde35c
4
+ data.tar.gz: 3c3b0c21fc91326431f95d323daedc84f580ed53
5
5
  SHA512:
6
- metadata.gz: e9b3e596e799952397dd8ae76c2deb52c84d356f468f1788f6c173cb9df4a77683ac279780900ed45dc49a729baba422288073ddac4c67349b93070bebd7140d
7
- data.tar.gz: ed8e0f2321f97843748e7d980b2e15e97fe651c2a80409f8daaa049786a426c3321eb77791e3ecb641d570c0e9dc7ae9673a2ea25086e7b513323597e4e519c4
6
+ metadata.gz: b6e4d910641926e734e9c0cc22d486e15ac695c549ca1eb09c1e78621caa49e6e535533becb83dcd3f2ad93b81954cfc8efa74ade21f4b618a6f56ffe74ed978
7
+ data.tar.gz: 2ecc4bfd0e9e123e46254dbe264d5a102593a48a1668f9a0d41f7d66b22266925b58b21cf57c138121f4ece8559be3151ca33de6eb5905f965def79cbae096c6
@@ -65,34 +65,10 @@ class TextExtractor
65
65
  value(id, re) { |val| IPAddr.new(val) }
66
66
  end
67
67
 
68
- def strip_record(regexp, strip: nil)
69
- lines = regexp.source.split("\n")
70
- prefix = lines.last
71
-
72
- if prefix =~ /\A\s*\z/
73
- lines.pop if lines.first =~ /\A\s*\z/
74
- lines.shift
75
- strip_record_by_line(lines, prefix, strip)
76
- end
77
-
78
- Regexp.new(lines.join("\n"), regexp.options)
79
- end
80
-
81
- def strip_record_by_line(lines, prefix, strip)
82
- lines.map! { |s| s.gsub(prefix.to_s, '') }
83
- case strip
84
- when :left then lines.map! { |s| "\[ \t\r\f]*#{s.lstrip}" }
85
- when :right then lines.map! { |s| "#{s.rstrip}\[ \t\r\f]*" }
86
- when :both then lines.map! { |s| "\[ \t\r\f]*#{s.strip}\[ \t\r\f]*" }
87
- end
88
- end
89
-
90
68
  def record(klass = Record, **kwargs, &block)
91
69
  raise "#{self.class}.record requires a block" unless block
92
- @current_record_values = []
93
- regexp = strip_record(instance_exec(&block), strip: kwargs.delete(:strip))
94
- kwargs[:values] = @current_record_values
95
- @records << klass.new(regexp, **kwargs)
70
+ kwargs[:values] = @current_record_values = []
71
+ @records << klass.new(instance_exec(&block), **kwargs)
96
72
  end
97
73
 
98
74
  def filldown(**kwargs, &block)
@@ -39,7 +39,7 @@ class TextExtractor
39
39
  private
40
40
 
41
41
  DIRECTIVE_MAP = {
42
- ' ' => { class: Comment, arguments: ->(source) { [source[1..-1]] } },
42
+ ' ' => { class: Comment },
43
43
  'any' => { class: Any },
44
44
  'begin' => { class: Begin, arguments: :parsed },
45
45
  'end' => { class: End },
@@ -95,9 +95,9 @@ class TextExtractor
95
95
  end
96
96
 
97
97
  def parse_one_directive(source)
98
- md = source.match(/^[a-z_]+/)
98
+ md = source.match(/^[a-z_]+/) || source.match(/^ /)
99
+ raise "Unknown directive(s) in #{@state.current_line}" unless md
99
100
  word = md[0]
100
- raise "Unknown directive(s) #{source}" unless md
101
101
  map = DIRECTIVE_MAP.fetch(word) { raise "Unknown directive #{word}" }
102
102
  args = parse_arguments(map[:arguments], md.post_match)
103
103
  map.fetch(:class).new(@state, *args)
@@ -73,7 +73,11 @@ class TextExtractor
73
73
  # skip to end of line
74
74
  class Rest < Directive
75
75
  def call
76
- state.current = [state.current, '[^\\n]*']
76
+ state.current = if state.newline?
77
+ [state.current.chomp, '[^\\n]*\n']
78
+ else
79
+ [state.current, '[^\\n]*']
80
+ end
77
81
  end
78
82
  end
79
83
  end
@@ -4,8 +4,9 @@ class TextExtractor
4
4
  class Record
5
5
  attr_reader :regexp, :factory, :values
6
6
 
7
- def initialize(regexp, factory: nil, values: [], fill: [], directives: true)
8
- @regexp = expand_regexp(regexp, directives)
7
+ def initialize(regexp, factory: nil, values: [], fill: [], directives: true,
8
+ strip: nil)
9
+ @regexp = build_regexp(regexp, directives, strip)
9
10
  @factory = factory
10
11
  @constructor = FactoryAnalyzer.new(factory).to_proc
11
12
  @values = values.map { |val| [val.id, val] }.to_h
@@ -25,6 +26,33 @@ class TextExtractor
25
26
  @constructor.call(extracted)
26
27
  end
27
28
 
29
+ def build_regexp(regexp, directives, strip)
30
+ stripped = strip_regexp(regexp, strip)
31
+ expanded = expand_regexp(stripped, directives)
32
+ ignore_regexp(expanded, strip)
33
+ end
34
+
35
+ def strip_regexp(regexp, strip)
36
+ lines = regexp.source.split("\n")
37
+ prefix = lines.last
38
+ if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/
39
+ lines.shift
40
+ lines = lines.map { |s| s.gsub(prefix, '') }
41
+ lines = lines.map(&regexp_line_stripper(strip))
42
+ end
43
+ Regexp.new(lines.join("\n"), regexp.options)
44
+ end
45
+
46
+ def regexp_line_stripper(strip)
47
+ case strip
48
+ when :left then ->(s) { s.lstrip }
49
+ when :right then ->(s) { s.rstrip }
50
+ when :both then ->(s) { s.strip }
51
+ when nil, false then ->(s) { s }
52
+ else raise "Unknown strip option: #{strip}"
53
+ end
54
+ end
55
+
28
56
  def expand_regexp(regexp, directives)
29
57
  if directives
30
58
  TextExtractor.expand_directives(regexp)
@@ -33,6 +61,21 @@ class TextExtractor
33
61
  end
34
62
  end
35
63
 
64
+ def ignore_regexp(regexp, strip)
65
+ return regexp unless strip
66
+ lines = regexp.source.split("\n").map(&regexp_line_ignorer(strip))
67
+ Regexp.new(lines.join("\n"), regexp.options)
68
+ end
69
+
70
+ def regexp_line_ignorer(strip)
71
+ case strip
72
+ when :left then ->(s) { "\[ \\t\\r\\f]*#{s}" }
73
+ when :right then ->(s) { "#{s}\[ \\t\\r\\f]*" }
74
+ when :both then ->(s) { "\[ \\t\\r\\f]*#{s}\[ \\t\\r\\f]*" }
75
+ else raise "Unknown ignore whitespace option: #{strip}"
76
+ end
77
+ end
78
+
36
79
  def match(string, pos = 0)
37
80
  @regexp.match(string, pos)
38
81
  end
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.1.7'
3
+ '0.1.8'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-22 00:00:00.000000000 Z
11
+ date: 2016-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler