text_extractor 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/text_extractor.rb +9 -9
- data/lib/text_extractor/filldown.rb +1 -1
- data/lib/text_extractor/record.rb +15 -3
- data/lib/text_extractor/version.rb +1 -1
- metadata +56 -15
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 80ea0027acddf0b1b3cd96a78737c6e990159a71
         | 
| 4 | 
            +
              data.tar.gz: 4938b707b639b5efde6013e2c06f1c04a57567a6
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: a9d49fa0d38752dbe50969db22489740000ecce854c4e1105e8dd6be0b6a24328f799c6d3594e9a7a3fcab3c7d6f96512ca233030b20bf35461227bbd4351efd
         | 
| 7 | 
            +
              data.tar.gz: 1281833781015336ae886392e07d884ed82787cdba4a5d2eb57f2f1740c5df40ab95d3de88ed38be73034879025c9be2998f62119479fbee86e41a59aeb5960a
         | 
    
        data/lib/text_extractor.rb
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 | 
            -
            require_relative  | 
| 2 | 
            -
            require_relative  | 
| 3 | 
            -
            require_relative  | 
| 4 | 
            -
            require_relative  | 
| 1 | 
            +
            require_relative 'text_extractor/extraction'
         | 
| 2 | 
            +
            require_relative 'text_extractor/filldown'
         | 
| 3 | 
            +
            require_relative 'text_extractor/record'
         | 
| 4 | 
            +
            require_relative 'text_extractor/value'
         | 
| 5 5 |  | 
| 6 6 | 
             
            # represents an extractor definition
         | 
| 7 7 | 
             
            class TextExtractor
         | 
| 8 8 | 
             
              attr_reader :records, :values
         | 
| 9 9 |  | 
| 10 10 | 
             
              def initialize(&block)
         | 
| 11 | 
            -
                 | 
| 11 | 
            +
                raise "#{self.class}.new requires a block" unless block
         | 
| 12 12 | 
             
                @values = {}
         | 
| 13 13 | 
             
                @fill = {}
         | 
| 14 14 | 
             
                @values = {}
         | 
| @@ -21,7 +21,7 @@ class TextExtractor | |
| 21 21 | 
             
              module Patterns
         | 
| 22 22 | 
             
                INTEGER = /\d+/
         | 
| 23 23 | 
             
                FLOAT = /\d+\.?|\d*\.\d+/
         | 
| 24 | 
            -
                RATIONAL = %r | 
| 24 | 
            +
                RATIONAL = %r{\d+/\d+}
         | 
| 25 25 | 
             
                IPV4 = /[0-9.]{7,15}/
         | 
| 26 26 | 
             
                IPV6 = /[:a-fA-F0-9\.]{2,45}/
         | 
| 27 27 | 
             
                IPADDR = Regexp.union(IPV4, IPV6)
         | 
| @@ -68,12 +68,12 @@ class TextExtractor | |
| 68 68 | 
             
              def strip_record(regexp)
         | 
| 69 69 | 
             
                lines = regexp.source.lines
         | 
| 70 70 | 
             
                prefix = lines.last
         | 
| 71 | 
            -
                lines.map! { |s| s.gsub( | 
| 71 | 
            +
                lines.map! { |s| s.gsub(prefix.to_s, '') } if prefix =~ /\A\s*\z/
         | 
| 72 72 | 
             
                Regexp.new(lines.join.strip, regexp.options)
         | 
| 73 73 | 
             
              end
         | 
| 74 74 |  | 
| 75 75 | 
             
              def record(klass = Record, **kwargs, &block)
         | 
| 76 | 
            -
                 | 
| 76 | 
            +
                raise "#{self.class}.record requires a block" unless block
         | 
| 77 77 | 
             
                @current_record_values = []
         | 
| 78 78 | 
             
                regexp = strip_record(instance_exec(&block))
         | 
| 79 79 | 
             
                kwargs[:values] = @current_record_values
         | 
| @@ -81,7 +81,7 @@ class TextExtractor | |
| 81 81 | 
             
              end
         | 
| 82 82 |  | 
| 83 83 | 
             
              def filldown(**kwargs, &block)
         | 
| 84 | 
            -
                 | 
| 84 | 
            +
                raise "#{self.class}.filldown requires a block" unless block
         | 
| 85 85 | 
             
                record(Filldown, **kwargs, &block)
         | 
| 86 86 | 
             
              end
         | 
| 87 87 |  | 
| @@ -12,9 +12,21 @@ class TextExtractor | |
| 12 12 |  | 
| 13 13 | 
             
                def extraction(match, fill)
         | 
| 14 14 | 
             
                  extracted = {}.merge!(@default_values)
         | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
                   | 
| 15 | 
            +
                                .merge!(extract_fills fill)
         | 
| 16 | 
            +
                                .merge!(extract_values match)
         | 
| 17 | 
            +
                  build_extraction(extracted)
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def build_extraction(extracted)
         | 
| 21 | 
            +
                  case factory
         | 
| 22 | 
            +
                  when Hash
         | 
| 23 | 
            +
                    klass, params = factory.first
         | 
| 24 | 
            +
                    klass.new(*extracted.values_at(*params))
         | 
| 25 | 
            +
                  when Class
         | 
| 26 | 
            +
                    factory.new(*extracted.values)
         | 
| 27 | 
            +
                  else
         | 
| 28 | 
            +
                    extracted
         | 
| 29 | 
            +
                  end
         | 
| 18 30 | 
             
                end
         | 
| 19 31 |  | 
| 20 32 | 
             
                def match(string, pos = 0)
         | 
    
        metadata
    CHANGED
    
    | @@ -1,16 +1,57 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: text_extractor
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0 | 
| 5 | 
            -
              prerelease: 
         | 
| 4 | 
            +
              version: 0.1.0
         | 
| 6 5 | 
             
            platform: ruby
         | 
| 7 6 | 
             
            authors:
         | 
| 8 7 | 
             
            - Ben Miller
         | 
| 9 8 | 
             
            autorequire: 
         | 
| 10 9 | 
             
            bindir: bin
         | 
| 11 10 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date:  | 
| 13 | 
            -
            dependencies: | 
| 11 | 
            +
            date: 2016-03-11 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: bundler
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - "~>"
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '1.0'
         | 
| 20 | 
            +
              type: :development
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - "~>"
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '1.0'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: rake
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - "~>"
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '10.0'
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - "~>"
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '10.0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: minitest
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - "~>"
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '5.0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - "~>"
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '5.0'
         | 
| 14 55 | 
             
            description: 
         | 
| 15 56 | 
             
            email: bjmllr@gmail.com
         | 
| 16 57 | 
             
            executables: []
         | 
| @@ -18,34 +59,34 @@ extensions: [] | |
| 18 59 | 
             
            extra_rdoc_files: []
         | 
| 19 60 | 
             
            files:
         | 
| 20 61 | 
             
            - lib/text_extractor.rb
         | 
| 21 | 
            -
            - lib/text_extractor/version.rb
         | 
| 22 62 | 
             
            - lib/text_extractor/extraction.rb
         | 
| 23 | 
            -
            - lib/text_extractor/value.rb
         | 
| 24 | 
            -
            - lib/text_extractor/record.rb
         | 
| 25 63 | 
             
            - lib/text_extractor/filldown.rb
         | 
| 64 | 
            +
            - lib/text_extractor/record.rb
         | 
| 65 | 
            +
            - lib/text_extractor/value.rb
         | 
| 66 | 
            +
            - lib/text_extractor/version.rb
         | 
| 26 67 | 
             
            homepage: https://github.com/bjmllr/text_extractor
         | 
| 27 68 | 
             
            licenses:
         | 
| 28 | 
            -
            -  | 
| 69 | 
            +
            - GPL-3.0
         | 
| 70 | 
            +
            metadata: {}
         | 
| 29 71 | 
             
            post_install_message: 
         | 
| 30 72 | 
             
            rdoc_options: []
         | 
| 31 73 | 
             
            require_paths:
         | 
| 32 74 | 
             
            - lib
         | 
| 33 75 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 34 | 
            -
              none: false
         | 
| 35 76 | 
             
              requirements:
         | 
| 36 | 
            -
              - -  | 
| 77 | 
            +
              - - ">="
         | 
| 37 78 | 
             
                - !ruby/object:Gem::Version
         | 
| 38 | 
            -
                  version: 2. | 
| 79 | 
            +
                  version: 2.1.0
         | 
| 39 80 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 40 | 
            -
              none: false
         | 
| 41 81 | 
             
              requirements:
         | 
| 42 | 
            -
              - -  | 
| 82 | 
            +
              - - ">="
         | 
| 43 83 | 
             
                - !ruby/object:Gem::Version
         | 
| 44 84 | 
             
                  version: '0'
         | 
| 45 85 | 
             
            requirements: []
         | 
| 46 86 | 
             
            rubyforge_project: 
         | 
| 47 | 
            -
            rubygems_version:  | 
| 87 | 
            +
            rubygems_version: 2.5.1
         | 
| 48 88 | 
             
            signing_key: 
         | 
| 49 | 
            -
            specification_version:  | 
| 89 | 
            +
            specification_version: 4
         | 
| 50 90 | 
             
            summary: Easily extract data from text
         | 
| 51 91 | 
             
            test_files: []
         | 
| 92 | 
            +
            has_rdoc: 
         |