wapiti 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wapiti/dataset.rb +21 -21
- data/lib/wapiti/sequence.rb +8 -8
- data/lib/wapiti/token.rb +3 -3
- data/lib/wapiti/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: eefd2c624bb02b635f41b9577d303abc352783d2530ef42f3db7f91db2384174
         | 
| 4 | 
            +
              data.tar.gz: 73c766d6e05599b5167743dfc53daf20f74db0af0fe22d3ea6a947e9882189ab
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 012c48b99ce4d6af1223f97fd03308f5af0c833e4db250842546b7f75de830fa700f71d52043fdd6c2b54f1f24cc83c0a0929211bf23153113d6efadf619fb68
         | 
| 7 | 
            +
              data.tar.gz: 1a78e6de9025f6f6e199ff70e0973a7aeb42beda218220f3c63801a8239cd5371bf4546796db82134646d4491fa9cf00072371cfe058a4632ab3946b621a32bd
         | 
    
        data/lib/wapiti/dataset.rb
    CHANGED
    
    | @@ -12,7 +12,7 @@ module Wapiti | |
| 12 12 | 
             
                def_delegators :sequences, :[], :empty?, :length, :size, :slice!, :uniq!
         | 
| 13 13 |  | 
| 14 14 | 
             
                class << self
         | 
| 15 | 
            -
                  def parse(dataset, separator: /(?:\r?\n){2,}/, ** | 
| 15 | 
            +
                  def parse(dataset, separator: /(?:\r?\n){2,}/, **opts)
         | 
| 16 16 | 
             
                    case dataset
         | 
| 17 17 | 
             
                    when Array
         | 
| 18 18 | 
             
                      new(dataset.map { |seq|
         | 
| @@ -23,12 +23,12 @@ module Wapiti | |
| 23 23 | 
             
                      })
         | 
| 24 24 | 
             
                    when String
         | 
| 25 25 | 
             
                      new(dataset.split(separator).map { |seq|
         | 
| 26 | 
            -
                        Sequence.parse(seq, ** | 
| 26 | 
            +
                        Sequence.parse(seq, **opts)
         | 
| 27 27 | 
             
                      }.reject(&:empty?))
         | 
| 28 28 | 
             
                    when REXML::Document
         | 
| 29 29 | 
             
                      new(dataset.elements.to_a('dataset/sequence').map { |seq|
         | 
| 30 30 | 
             
                        Sequence.new(seq.elements.to_a.map { |sgm|
         | 
| 31 | 
            -
                          sgm.text.strip.split( | 
| 31 | 
            +
                          sgm.text.strip.split(opts[:spacer] || /\s+/).map { |tk|
         | 
| 32 32 | 
             
                            Token.new tk, label: sgm.name
         | 
| 33 33 | 
             
                          }
         | 
| 34 34 | 
             
                        }.flatten)
         | 
| @@ -38,16 +38,16 @@ module Wapiti | |
| 38 38 | 
             
                    end
         | 
| 39 39 | 
             
                  end
         | 
| 40 40 |  | 
| 41 | 
            -
                  def open(path, format: File.extname(path), ** | 
| 41 | 
            +
                  def open(path, format: File.extname(path), **opts)
         | 
| 42 42 | 
             
                    raise ArgumentError,
         | 
| 43 43 | 
             
                      "cannot open dataset from tainted path: '#{path}'" if path.tainted?
         | 
| 44 44 |  | 
| 45 45 | 
             
                    input = File.read(path, encoding: 'utf-8')
         | 
| 46 46 | 
             
                    case format.downcase
         | 
| 47 47 | 
             
                    when '.xml', 'xml'
         | 
| 48 | 
            -
                      parse(REXML::Document.new(input), ** | 
| 48 | 
            +
                      parse(REXML::Document.new(input), **opts)
         | 
| 49 49 | 
             
                    else
         | 
| 50 | 
            -
                      parse(input, ** | 
| 50 | 
            +
                      parse(input, **opts)
         | 
| 51 51 | 
             
                    end
         | 
| 52 52 | 
             
                  end
         | 
| 53 53 | 
             
                end
         | 
| @@ -86,8 +86,8 @@ module Wapiti | |
| 86 86 | 
             
                  self
         | 
| 87 87 | 
             
                end
         | 
| 88 88 |  | 
| 89 | 
            -
                def sample(n = 1, ** | 
| 90 | 
            -
                  Dataset.new sequences.sample(n, ** | 
| 89 | 
            +
                def sample(n = 1, **opts)
         | 
| 90 | 
            +
                  Dataset.new sequences.sample(n, **opts)
         | 
| 91 91 | 
             
                end
         | 
| 92 92 |  | 
| 93 93 | 
             
                def slice(start, length = 1)
         | 
| @@ -114,20 +114,20 @@ module Wapiti | |
| 114 114 | 
             
                  Dataset.new(sequences & other.sequences)
         | 
| 115 115 | 
             
                end
         | 
| 116 116 |  | 
| 117 | 
            -
                def to_s(separator: "\n\n", ** | 
| 118 | 
            -
                  map { |sq| sq.to_s(** | 
| 117 | 
            +
                def to_s(separator: "\n\n", **opts)
         | 
| 118 | 
            +
                  map { |sq| sq.to_s(**opts) }.join(separator)
         | 
| 119 119 | 
             
                end
         | 
| 120 120 |  | 
| 121 | 
            -
                def to_txt(separator: "\n", ** | 
| 122 | 
            -
                  map { |sq| sq.to_sentence(** | 
| 121 | 
            +
                def to_txt(separator: "\n", **opts)
         | 
| 122 | 
            +
                  map { |sq| sq.to_sentence(**opts) }.join(separator)
         | 
| 123 123 | 
             
                end
         | 
| 124 124 |  | 
| 125 | 
            -
                def to_a(** | 
| 126 | 
            -
                  map { |sq| sq.to_a(** | 
| 125 | 
            +
                def to_a(**opts)
         | 
| 126 | 
            +
                  map { |sq| sq.to_a(**opts) }
         | 
| 127 127 | 
             
                end
         | 
| 128 128 |  | 
| 129 | 
            -
                def to_xml(** | 
| 130 | 
            -
                  xml = Builder::XmlMarkup.new(** | 
| 129 | 
            +
                def to_xml(**opts)
         | 
| 130 | 
            +
                  xml = Builder::XmlMarkup.new(**opts)
         | 
| 131 131 | 
             
                  xml.instruct!
         | 
| 132 132 | 
             
                  xml.dataset do |ds|
         | 
| 133 133 | 
             
                    each do |seq|
         | 
| @@ -136,19 +136,19 @@ module Wapiti | |
| 136 136 | 
             
                  end
         | 
| 137 137 | 
             
                end
         | 
| 138 138 |  | 
| 139 | 
            -
                def to_yml(** | 
| 140 | 
            -
                  map { |sq| sq.to_h(** | 
| 139 | 
            +
                def to_yml(**opts)
         | 
| 140 | 
            +
                  map { |sq| sq.to_h(**opts) }
         | 
| 141 141 | 
             
                end
         | 
| 142 142 |  | 
| 143 | 
            -
                def save(path, format: File.extname(path), ** | 
| 143 | 
            +
                def save(path, format: File.extname(path), **opts)
         | 
| 144 144 | 
             
                  raise ArgumentError,
         | 
| 145 145 | 
             
                    "cannot write dataset to tainted path: '#{path}'" if path.tainted?
         | 
| 146 146 |  | 
| 147 147 | 
             
                  output = case format.downcase
         | 
| 148 148 | 
             
                    when '.txt', 'txt'
         | 
| 149 | 
            -
                      to_s(** | 
| 149 | 
            +
                      to_s(**opts)
         | 
| 150 150 | 
             
                    when '.xml', 'xml'
         | 
| 151 | 
            -
                      to_xml(** | 
| 151 | 
            +
                      to_xml(**opts)
         | 
| 152 152 | 
             
                    else
         | 
| 153 153 | 
             
                      raise ArgumentError, "unknown format: '#{format}'"
         | 
| 154 154 | 
             
                    end
         | 
    
        data/lib/wapiti/sequence.rb
    CHANGED
    
    | @@ -11,9 +11,9 @@ module Wapiti | |
| 11 11 | 
             
                def_delegators :tokens, :[], :empty?, :size
         | 
| 12 12 |  | 
| 13 13 | 
             
                class << self
         | 
| 14 | 
            -
                  def parse(string, delimiter: /\r?\n/, ** | 
| 14 | 
            +
                  def parse(string, delimiter: /\r?\n/, **opts)
         | 
| 15 15 | 
             
                    new(string.split(delimiter).map { |token|
         | 
| 16 | 
            -
                      Token.parse token, ** | 
| 16 | 
            +
                      Token.parse token, **opts
         | 
| 17 17 | 
             
                    }.reject(&:empty?))
         | 
| 18 18 | 
             
                  end
         | 
| 19 19 | 
             
                end
         | 
| @@ -82,20 +82,20 @@ module Wapiti | |
| 82 82 | 
             
                  end
         | 
| 83 83 | 
             
                end
         | 
| 84 84 |  | 
| 85 | 
            -
                def to_a(** | 
| 86 | 
            -
                  tokens.map { |tk| tk.to_s(** | 
| 85 | 
            +
                def to_a(**opts)
         | 
| 86 | 
            +
                  tokens.map { |tk| tk.to_s(**opts) }
         | 
| 87 87 | 
             
                end
         | 
| 88 88 |  | 
| 89 | 
            -
                def to_s(delimiter: "\n", ** | 
| 90 | 
            -
                  tokens.map { |tk| tk.to_s(** | 
| 89 | 
            +
                def to_s(delimiter: "\n", **opts)
         | 
| 90 | 
            +
                  tokens.map { |tk| tk.to_s(**opts) }.join(delimiter)
         | 
| 91 91 | 
             
                end
         | 
| 92 92 |  | 
| 93 93 | 
             
                def to_sentence(delimiter: ' ')
         | 
| 94 94 | 
             
                  to_s(delimiter: delimiter, expanded: false, tagged: false)
         | 
| 95 95 | 
             
                end
         | 
| 96 96 |  | 
| 97 | 
            -
                def to_h(symbolize_keys: false, ** | 
| 98 | 
            -
                  each_segment(** | 
| 97 | 
            +
                def to_h(symbolize_keys: false, **opts)
         | 
| 98 | 
            +
                  each_segment(**opts).reduce({}) do |h, (label, segment)|
         | 
| 99 99 | 
             
                    label = label.intern if symbolize_keys
         | 
| 100 100 | 
             
                    h[label] = [] unless h.key? label
         | 
| 101 101 | 
             
                    h[label] << segment
         | 
    
        data/lib/wapiti/token.rb
    CHANGED
    
    | @@ -7,7 +7,7 @@ module Wapiti | |
| 7 7 | 
             
                attr_accessor :value, :label, :observations, :score
         | 
| 8 8 |  | 
| 9 9 | 
             
                class << self
         | 
| 10 | 
            -
                  def parse(string, spacer: /\s+/, tagged: false)
         | 
| 10 | 
            +
                  def parse(string, spacer: /\s+/, tagged: false, **opts)
         | 
| 11 11 | 
             
                    value, *observations = string.split(spacer)
         | 
| 12 12 | 
             
                    new(value, {
         | 
| 13 13 | 
             
                      label: (tagged ? observations.pop : nil).to_s,
         | 
| @@ -56,8 +56,8 @@ module Wapiti | |
| 56 56 | 
             
                  end
         | 
| 57 57 | 
             
                end
         | 
| 58 58 |  | 
| 59 | 
            -
                def to_s(spacer: ' ', ** | 
| 60 | 
            -
                  to_a(** | 
| 59 | 
            +
                def to_s(spacer: ' ', **opts)
         | 
| 60 | 
            +
                  to_a(**opts).join(spacer)
         | 
| 61 61 | 
             
                end
         | 
| 62 62 |  | 
| 63 63 | 
             
                def to_a(expanded: true, tagged: true, encode: false)
         | 
    
        data/lib/wapiti/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: wapiti
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.0. | 
| 4 | 
            +
              version: 1.0.4
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Sylvester Keil
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2018- | 
| 11 | 
            +
            date: 2018-09-14 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: builder
         |