text_extractor 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e956aff10058c42b11f0cab0fd26f4218e6fcaa9
4
- data.tar.gz: 0f947396657eb89749366e3c84ccb08899e07d19
3
+ metadata.gz: 33c60b21e8e025e62025f06af668dd4859cde35c
4
+ data.tar.gz: 3c3b0c21fc91326431f95d323daedc84f580ed53
5
5
  SHA512:
6
- metadata.gz: e9b3e596e799952397dd8ae76c2deb52c84d356f468f1788f6c173cb9df4a77683ac279780900ed45dc49a729baba422288073ddac4c67349b93070bebd7140d
7
- data.tar.gz: ed8e0f2321f97843748e7d980b2e15e97fe651c2a80409f8daaa049786a426c3321eb77791e3ecb641d570c0e9dc7ae9673a2ea25086e7b513323597e4e519c4
6
+ metadata.gz: b6e4d910641926e734e9c0cc22d486e15ac695c549ca1eb09c1e78621caa49e6e535533becb83dcd3f2ad93b81954cfc8efa74ade21f4b618a6f56ffe74ed978
7
+ data.tar.gz: 2ecc4bfd0e9e123e46254dbe264d5a102593a48a1668f9a0d41f7d66b22266925b58b21cf57c138121f4ece8559be3151ca33de6eb5905f965def79cbae096c6
@@ -65,34 +65,10 @@ class TextExtractor
65
65
  value(id, re) { |val| IPAddr.new(val) }
66
66
  end
67
67
 
68
- def strip_record(regexp, strip: nil)
69
- lines = regexp.source.split("\n")
70
- prefix = lines.last
71
-
72
- if prefix =~ /\A\s*\z/
73
- lines.pop if lines.first =~ /\A\s*\z/
74
- lines.shift
75
- strip_record_by_line(lines, prefix, strip)
76
- end
77
-
78
- Regexp.new(lines.join("\n"), regexp.options)
79
- end
80
-
81
- def strip_record_by_line(lines, prefix, strip)
82
- lines.map! { |s| s.gsub(prefix.to_s, '') }
83
- case strip
84
- when :left then lines.map! { |s| "\[ \t\r\f]*#{s.lstrip}" }
85
- when :right then lines.map! { |s| "#{s.rstrip}\[ \t\r\f]*" }
86
- when :both then lines.map! { |s| "\[ \t\r\f]*#{s.strip}\[ \t\r\f]*" }
87
- end
88
- end
89
-
90
68
  def record(klass = Record, **kwargs, &block)
91
69
  raise "#{self.class}.record requires a block" unless block
92
- @current_record_values = []
93
- regexp = strip_record(instance_exec(&block), strip: kwargs.delete(:strip))
94
- kwargs[:values] = @current_record_values
95
- @records << klass.new(regexp, **kwargs)
70
+ kwargs[:values] = @current_record_values = []
71
+ @records << klass.new(instance_exec(&block), **kwargs)
96
72
  end
97
73
 
98
74
  def filldown(**kwargs, &block)
@@ -39,7 +39,7 @@ class TextExtractor
39
39
  private
40
40
 
41
41
  DIRECTIVE_MAP = {
42
- ' ' => { class: Comment, arguments: ->(source) { [source[1..-1]] } },
42
+ ' ' => { class: Comment },
43
43
  'any' => { class: Any },
44
44
  'begin' => { class: Begin, arguments: :parsed },
45
45
  'end' => { class: End },
@@ -95,9 +95,9 @@ class TextExtractor
95
95
  end
96
96
 
97
97
  def parse_one_directive(source)
98
- md = source.match(/^[a-z_]+/)
98
+ md = source.match(/^[a-z_]+/) || source.match(/^ /)
99
+ raise "Unknown directive(s) in #{@state.current_line}" unless md
99
100
  word = md[0]
100
- raise "Unknown directive(s) #{source}" unless md
101
101
  map = DIRECTIVE_MAP.fetch(word) { raise "Unknown directive #{word}" }
102
102
  args = parse_arguments(map[:arguments], md.post_match)
103
103
  map.fetch(:class).new(@state, *args)
@@ -73,7 +73,11 @@ class TextExtractor
73
73
  # skip to end of line
74
74
  class Rest < Directive
75
75
  def call
76
- state.current = [state.current, '[^\\n]*']
76
+ state.current = if state.newline?
77
+ [state.current.chomp, '[^\\n]*\n']
78
+ else
79
+ [state.current, '[^\\n]*']
80
+ end
77
81
  end
78
82
  end
79
83
  end
@@ -4,8 +4,9 @@ class TextExtractor
4
4
  class Record
5
5
  attr_reader :regexp, :factory, :values
6
6
 
7
- def initialize(regexp, factory: nil, values: [], fill: [], directives: true)
8
- @regexp = expand_regexp(regexp, directives)
7
+ def initialize(regexp, factory: nil, values: [], fill: [], directives: true,
8
+ strip: nil)
9
+ @regexp = build_regexp(regexp, directives, strip)
9
10
  @factory = factory
10
11
  @constructor = FactoryAnalyzer.new(factory).to_proc
11
12
  @values = values.map { |val| [val.id, val] }.to_h
@@ -25,6 +26,33 @@ class TextExtractor
25
26
  @constructor.call(extracted)
26
27
  end
27
28
 
29
+ def build_regexp(regexp, directives, strip)
30
+ stripped = strip_regexp(regexp, strip)
31
+ expanded = expand_regexp(stripped, directives)
32
+ ignore_regexp(expanded, strip)
33
+ end
34
+
35
+ def strip_regexp(regexp, strip)
36
+ lines = regexp.source.split("\n")
37
+ prefix = lines.last
38
+ if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/
39
+ lines.shift
40
+ lines = lines.map { |s| s.gsub(prefix, '') }
41
+ lines = lines.map(&regexp_line_stripper(strip))
42
+ end
43
+ Regexp.new(lines.join("\n"), regexp.options)
44
+ end
45
+
46
+ def regexp_line_stripper(strip)
47
+ case strip
48
+ when :left then ->(s) { s.lstrip }
49
+ when :right then ->(s) { s.rstrip }
50
+ when :both then ->(s) { s.strip }
51
+ when nil, false then ->(s) { s }
52
+ else raise "Unknown strip option: #{strip}"
53
+ end
54
+ end
55
+
28
56
  def expand_regexp(regexp, directives)
29
57
  if directives
30
58
  TextExtractor.expand_directives(regexp)
@@ -33,6 +61,21 @@ class TextExtractor
33
61
  end
34
62
  end
35
63
 
64
+ def ignore_regexp(regexp, strip)
65
+ return regexp unless strip
66
+ lines = regexp.source.split("\n").map(&regexp_line_ignorer(strip))
67
+ Regexp.new(lines.join("\n"), regexp.options)
68
+ end
69
+
70
+ def regexp_line_ignorer(strip)
71
+ case strip
72
+ when :left then ->(s) { "\[ \\t\\r\\f]*#{s}" }
73
+ when :right then ->(s) { "#{s}\[ \\t\\r\\f]*" }
74
+ when :both then ->(s) { "\[ \\t\\r\\f]*#{s}\[ \\t\\r\\f]*" }
75
+ else raise "Unknown ignore whitespace option: #{strip}"
76
+ end
77
+ end
78
+
36
79
  def match(string, pos = 0)
37
80
  @regexp.match(string, pos)
38
81
  end
@@ -1,5 +1,5 @@
1
1
  class TextExtractor
2
2
  def self.version
3
- '0.1.7'
3
+ '0.1.8'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Miller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-22 00:00:00.000000000 Z
11
+ date: 2016-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler