RubyGems - text_extractor - Versions diffs - 0.1.7 → 0.1.8 - Mend

text_extractor 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/text_extractor.rb +2 -26
data/lib/text_extractor/directives.rb +3 -3
data/lib/text_extractor/directives/classes.rb +5 -1
data/lib/text_extractor/record.rb +45 -2
data/lib/text_extractor/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e956aff10058c42b11f0cab0fd26f4218e6fcaa9
-  data.tar.gz: 0f947396657eb89749366e3c84ccb08899e07d19
+  metadata.gz: 33c60b21e8e025e62025f06af668dd4859cde35c
+  data.tar.gz: 3c3b0c21fc91326431f95d323daedc84f580ed53
 SHA512:
-  metadata.gz: e9b3e596e799952397dd8ae76c2deb52c84d356f468f1788f6c173cb9df4a77683ac279780900ed45dc49a729baba422288073ddac4c67349b93070bebd7140d
-  data.tar.gz: ed8e0f2321f97843748e7d980b2e15e97fe651c2a80409f8daaa049786a426c3321eb77791e3ecb641d570c0e9dc7ae9673a2ea25086e7b513323597e4e519c4
+  metadata.gz: b6e4d910641926e734e9c0cc22d486e15ac695c549ca1eb09c1e78621caa49e6e535533becb83dcd3f2ad93b81954cfc8efa74ade21f4b618a6f56ffe74ed978
+  data.tar.gz: 2ecc4bfd0e9e123e46254dbe264d5a102593a48a1668f9a0d41f7d66b22266925b58b21cf57c138121f4ece8559be3151ca33de6eb5905f965def79cbae096c6

data/lib/text_extractor.rb CHANGED Viewed

@@ -65,34 +65,10 @@ class TextExtractor
     value(id, re) { |val| IPAddr.new(val) }
   end
-  def strip_record(regexp, strip: nil)
-    lines = regexp.source.split("\n")
-    prefix = lines.last
-    if prefix =~ /\A\s*\z/
-      lines.pop if lines.first =~ /\A\s*\z/
-      lines.shift
-      strip_record_by_line(lines, prefix, strip)
-    end
-    Regexp.new(lines.join("\n"), regexp.options)
-  end
-  def strip_record_by_line(lines, prefix, strip)
-    lines.map! { |s| s.gsub(prefix.to_s, '') }
-    case strip
-    when :left  then lines.map! { |s| "\[ \t\r\f]*#{s.lstrip}" }
-    when :right then lines.map! { |s| "#{s.rstrip}\[ \t\r\f]*" }
-    when :both  then lines.map! { |s| "\[ \t\r\f]*#{s.strip}\[ \t\r\f]*" }
-    end
-  end
   def record(klass = Record, **kwargs, &block)
     raise "#{self.class}.record requires a block" unless block
-    @current_record_values = []
-    regexp = strip_record(instance_exec(&block), strip: kwargs.delete(:strip))
-    kwargs[:values] = @current_record_values
-    @records << klass.new(regexp, **kwargs)
+    kwargs[:values] = @current_record_values = []
+    @records << klass.new(instance_exec(&block), **kwargs)
   end
   def filldown(**kwargs, &block)

data/lib/text_extractor/directives.rb CHANGED Viewed

@@ -39,7 +39,7 @@ class TextExtractor
     private
     DIRECTIVE_MAP = {
-      ' '      => { class: Comment, arguments: ->(source) { [source[1..-1]] } },
+      ' '      => { class: Comment },
       'any'    => { class: Any },
       'begin'  => { class: Begin, arguments: :parsed },
       'end'    => { class: End },
@@ -95,9 +95,9 @@ class TextExtractor
     end
     def parse_one_directive(source)
-      md = source.match(/^[a-z_]+/)
+      md = source.match(/^[a-z_]+/) || source.match(/^ /)
+      raise "Unknown directive(s) in #{@state.current_line}" unless md
       word = md[0]
-      raise "Unknown directive(s) #{source}" unless md
       map = DIRECTIVE_MAP.fetch(word) { raise "Unknown directive #{word}" }
       args = parse_arguments(map[:arguments], md.post_match)
       map.fetch(:class).new(@state, *args)

data/lib/text_extractor/directives/classes.rb CHANGED Viewed

@@ -73,7 +73,11 @@ class TextExtractor
     # skip to end of line
     class Rest < Directive
       def call
-        state.current = [state.current, '[^\\n]*']
+        state.current = if state.newline?
+                          [state.current.chomp, '[^\\n]*\n']
+                        else
+                          [state.current, '[^\\n]*']
+                        end
       end
     end
   end

data/lib/text_extractor/record.rb CHANGED Viewed

@@ -4,8 +4,9 @@ class TextExtractor
   class Record
     attr_reader :regexp, :factory, :values
-    def initialize(regexp, factory: nil, values: [], fill: [], directives: true)
-      @regexp = expand_regexp(regexp, directives)
+    def initialize(regexp, factory: nil, values: [], fill: [], directives: true,
+                   strip: nil)
+      @regexp = build_regexp(regexp, directives, strip)
       @factory = factory
       @constructor = FactoryAnalyzer.new(factory).to_proc
       @values = values.map { |val| [val.id, val] }.to_h
@@ -25,6 +26,33 @@ class TextExtractor
       @constructor.call(extracted)
     end
+    def build_regexp(regexp, directives, strip)
+      stripped = strip_regexp(regexp, strip)
+      expanded = expand_regexp(stripped, directives)
+      ignore_regexp(expanded, strip)
+    end
+    def strip_regexp(regexp, strip)
+      lines = regexp.source.split("\n")
+      prefix = lines.last
+      if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/
+        lines.shift
+        lines = lines.map { |s| s.gsub(prefix, '') }
+        lines = lines.map(&regexp_line_stripper(strip))
+      end
+      Regexp.new(lines.join("\n"), regexp.options)
+    end
+    def regexp_line_stripper(strip)
+      case strip
+      when :left  then ->(s) { s.lstrip }
+      when :right then ->(s) { s.rstrip }
+      when :both  then ->(s) { s.strip }
+      when nil, false then ->(s) { s }
+      else raise "Unknown strip option: #{strip}"
+      end
+    end
     def expand_regexp(regexp, directives)
       if directives
         TextExtractor.expand_directives(regexp)
@@ -33,6 +61,21 @@ class TextExtractor
       end
     end
+    def ignore_regexp(regexp, strip)
+      return regexp unless strip
+      lines = regexp.source.split("\n").map(&regexp_line_ignorer(strip))
+      Regexp.new(lines.join("\n"), regexp.options)
+    end
+    def regexp_line_ignorer(strip)
+      case strip
+      when :left  then ->(s) { "\[ \\t\\r\\f]*#{s}" }
+      when :right then ->(s) { "#{s}\[ \\t\\r\\f]*" }
+      when :both  then ->(s) { "\[ \\t\\r\\f]*#{s}\[ \\t\\r\\f]*" }
+      else raise "Unknown ignore whitespace option: #{strip}"
+      end
+    end
     def match(string, pos = 0)
       @regexp.match(string, pos)
     end

data/lib/text_extractor/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 class TextExtractor
   def self.version
-    '0.1.7'
+    '0.1.8'
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: text_extractor
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.1.8
 platform: ruby
 authors:
 - Ben Miller
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-03-22 00:00:00.000000000 Z
+date: 2016-03-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler