opener-property-tagger 3.0.5 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
 - data/lib/opener/property_tagger.rb +27 -21
 - data/lib/opener/property_tagger/aspects_cache.rb +47 -0
 - data/lib/opener/property_tagger/cli.rb +4 -1
 - data/lib/opener/property_tagger/processor.rb +50 -44
 - data/lib/opener/property_tagger/remote_aspects_cache.rb +40 -0
 - data/lib/opener/property_tagger/version.rb +5 -3
 - data/opener-property-tagger.gemspec +4 -1
 - data/task/test.rake +2 -1
 - metadata +58 -9
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 2 
     | 
    
         
            +
            SHA256:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 168a0501dc6567285dd70b1a304fdc885b6d9493f9ca62863a428068b42d284e
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 6e9dd2b446eca6e75d4644e5cb59ec8ad64ec6f7f88e79266142129ce5ee59eb
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 7a209b8da679c06ea5fce64a11142b22b8b7a3c4da2775f832f4266af152c624f8ddb9e81bf7cbb96d668babc303fedb1243988b243faa2c72a5b7438c47c9b6
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 621b9a11a6c2230aee345fc4d072b8636d1c63b6533960103de475355a54baac7924867eb59176850d7b0aaaccdbc1a4a6eccf9af62652bbaf6f26097163778d
         
     | 
| 
         @@ -1,12 +1,18 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'open3'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'slop'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'oga'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'monitor'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'httpclient'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'hashie'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'json'
         
     | 
| 
       4 
8 
     | 
    
         | 
| 
       5 
9 
     | 
    
         
             
            require 'rexml/document'
         
     | 
| 
       6 
10 
     | 
    
         
             
            require 'rexml/formatters/pretty'
         
     | 
| 
       7 
11 
     | 
    
         | 
| 
       8 
12 
     | 
    
         
             
            require_relative 'property_tagger/version'
         
     | 
| 
       9 
13 
     | 
    
         
             
            require_relative 'property_tagger/cli'
         
     | 
| 
      
 14 
     | 
    
         
            +
            require_relative 'property_tagger/aspects_cache'
         
     | 
| 
      
 15 
     | 
    
         
            +
            require_relative 'property_tagger/remote_aspects_cache'
         
     | 
| 
       10 
16 
     | 
    
         
             
            require_relative 'property_tagger/processor'
         
     | 
| 
       11 
17 
     | 
    
         | 
| 
       12 
18 
     | 
    
         
             
            module Opener
         
     | 
| 
         @@ -41,36 +47,36 @@ module Opener 
     | 
|
| 
       41 
47 
     | 
    
         
             
                # @return [String]
         
     | 
| 
       42 
48 
     | 
    
         
             
                #
         
     | 
| 
       43 
49 
     | 
    
         
             
                def path
         
     | 
| 
       44 
     | 
    
         
            -
                  path  
     | 
| 
      
 50 
     | 
    
         
            +
                  return @path if @path
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                  @path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
         
     | 
| 
       45 
53 
     | 
    
         
             
                    ENV['PROPERTY_TAGGER_LEXICONS_PATH']
         
     | 
| 
      
 54 
     | 
    
         
            +
                  return unless @path
         
     | 
| 
       46 
55 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
                   
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                  end
         
     | 
| 
      
 56 
     | 
    
         
            +
                  @path = File.expand_path @path
         
     | 
| 
      
 57 
     | 
    
         
            +
                end
         
     | 
| 
       50 
58 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
      
 59 
     | 
    
         
            +
                def remote_url
         
     | 
| 
      
 60 
     | 
    
         
            +
                  @remote_url ||= ENV['PROPERTY_TAGGER_LEXICONS_URL']
         
     | 
| 
       52 
61 
     | 
    
         
             
                end
         
     | 
| 
       53 
62 
     | 
    
         | 
| 
       54 
63 
     | 
    
         
             
                ##
         
     | 
| 
       55 
     | 
    
         
            -
                # Processes the input  
     | 
| 
       56 
     | 
    
         
            -
                # STDERR and an object containing process information.
         
     | 
| 
      
 64 
     | 
    
         
            +
                # Processes the input KAF document.
         
     | 
| 
       57 
65 
     | 
    
         
             
                #
         
     | 
| 
       58 
     | 
    
         
            -
                # @param [String] input 
     | 
| 
       59 
     | 
    
         
            -
                # @return [ 
     | 
| 
      
 66 
     | 
    
         
            +
                # @param [String] input
         
     | 
| 
      
 67 
     | 
    
         
            +
                # @return [String]
         
     | 
| 
       60 
68 
     | 
    
         
             
                #
         
     | 
| 
       61 
     | 
    
         
            -
                def run 
     | 
| 
       62 
     | 
    
         
            -
                   
     | 
| 
      
 69 
     | 
    
         
            +
                def run input
         
     | 
| 
      
 70 
     | 
    
         
            +
                  timestamp = !options[:no_time]
         
     | 
| 
       63 
71 
     | 
    
         | 
| 
       64 
     | 
    
         
            -
                   
     | 
| 
      
 72 
     | 
    
         
            +
                  Processor.new(input,
         
     | 
| 
      
 73 
     | 
    
         
            +
                    url:       remote_url,
         
     | 
| 
      
 74 
     | 
    
         
            +
                    path:      path,
         
     | 
| 
      
 75 
     | 
    
         
            +
                    timestamp: timestamp,
         
     | 
| 
      
 76 
     | 
    
         
            +
                    pretty:    options[:pretty],
         
     | 
| 
      
 77 
     | 
    
         
            +
                  ).process
         
     | 
| 
       65 
78 
     | 
    
         
             
                end
         
     | 
| 
       66 
79 
     | 
    
         | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
                def process(input)
         
     | 
| 
       70 
     | 
    
         
            -
                  processor = Processor.new(input, path, !options[:no_time])
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                  return processor.process
         
     | 
| 
       73 
     | 
    
         
            -
                end
         
     | 
| 
       74 
     | 
    
         
            -
              end # PolarityTagger
         
     | 
| 
       75 
     | 
    
         
            -
            end # Opener
         
     | 
| 
      
 80 
     | 
    
         
            +
              end
         
     | 
| 
      
 81 
     | 
    
         
            +
            end
         
     | 
| 
       76 
82 
     | 
    
         | 
| 
         @@ -0,0 +1,47 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Opener
         
     | 
| 
      
 2 
     | 
    
         
            +
              class PropertyTagger
         
     | 
| 
      
 3 
     | 
    
         
            +
                ##
         
     | 
| 
      
 4 
     | 
    
         
            +
                # Thread-safe cache for storing the contents of aspect files.
         
     | 
| 
      
 5 
     | 
    
         
            +
                #
         
     | 
| 
      
 6 
     | 
    
         
            +
                class AspectsCache
         
     | 
| 
      
 7 
     | 
    
         
            +
                  include MonitorMixin
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                  def initialize
         
     | 
| 
      
 10 
     | 
    
         
            +
                    super
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                    @cache = {}
         
     | 
| 
      
 13 
     | 
    
         
            +
                  end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 16 
     | 
    
         
            +
                  # Returns the aspects for the given file path. If the aspects don't exist
         
     | 
| 
      
 17 
     | 
    
         
            +
                  # they are first loaded into the cache.
         
     | 
| 
      
 18 
     | 
    
         
            +
                  #
         
     | 
| 
      
 19 
     | 
    
         
            +
                  # @param [String] path
         
     | 
| 
      
 20 
     | 
    
         
            +
                  #
         
     | 
| 
      
 21 
     | 
    
         
            +
                  def [](path)
         
     | 
| 
      
 22 
     | 
    
         
            +
                    synchronize do
         
     | 
| 
      
 23 
     | 
    
         
            +
                      @cache[path] = load_aspects(path) unless @cache.key?(path)
         
     | 
| 
      
 24 
     | 
    
         
            +
                    end
         
     | 
| 
      
 25 
     | 
    
         
            +
                  end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                  alias_method :get, :[]
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 30 
     | 
    
         
            +
                  # Loads the aspects of the given path.
         
     | 
| 
      
 31 
     | 
    
         
            +
                  #
         
     | 
| 
      
 32 
     | 
    
         
            +
                  # @param [String] path
         
     | 
| 
      
 33 
     | 
    
         
            +
                  #
         
     | 
| 
      
 34 
     | 
    
         
            +
                  def load_aspects(path)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    mapping = Hash.new { |hash, key| hash[key] = [] }
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    File.foreach(path) do |line|
         
     | 
| 
      
 38 
     | 
    
         
            +
                      lemma, pos, aspect = line.chomp.split("\t")
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                      mapping[lemma.to_sym] << aspect
         
     | 
| 
      
 41 
     | 
    
         
            +
                    end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                    return mapping
         
     | 
| 
      
 44 
     | 
    
         
            +
                  end
         
     | 
| 
      
 45 
     | 
    
         
            +
                end # AspectsCache
         
     | 
| 
      
 46 
     | 
    
         
            +
              end # PropertyTagger
         
     | 
| 
      
 47 
     | 
    
         
            +
            end # Opener
         
     | 
| 
         @@ -56,10 +56,13 @@ Examples: 
     | 
|
| 
       56 
56 
     | 
    
         | 
| 
       57 
57 
     | 
    
         
             
                      on :'no-time', 'Disables adding of timestamps'
         
     | 
| 
       58 
58 
     | 
    
         | 
| 
      
 59 
     | 
    
         
            +
                      on :ugly, 'Disables pretty formatting of XML (faster)'
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
       59 
61 
     | 
    
         
             
                      run do |opts, args|
         
     | 
| 
       60 
62 
     | 
    
         
             
                        tagger = PropertyTagger.new(
         
     | 
| 
       61 
63 
     | 
    
         
             
                          :args    => args,
         
     | 
| 
       62 
     | 
    
         
            -
                          :no_time => opts[:'no-time']
         
     | 
| 
      
 64 
     | 
    
         
            +
                          :no_time => opts[:'no-time'],
         
     | 
| 
      
 65 
     | 
    
         
            +
                          :pretty  => !opts[:ugly]
         
     | 
| 
       63 
66 
     | 
    
         
             
                        )
         
     | 
| 
       64 
67 
     | 
    
         | 
| 
       65 
68 
     | 
    
         
             
                        input  = STDIN.tty? ? nil : STDIN.read
         
     | 
| 
         @@ -4,15 +4,37 @@ module Opener 
     | 
|
| 
       4 
4 
     | 
    
         
             
                # Class that applies property tagging to a given input KAF file.
         
     | 
| 
       5 
5 
     | 
    
         
             
                #
         
     | 
| 
       6 
6 
     | 
    
         
             
                class Processor
         
     | 
| 
       7 
     | 
    
         
            -
                  attr_accessor :document, :aspects_path, :language, :aspects, :terms,
         
     | 
| 
       8 
     | 
    
         
            -
                    :timestamp
         
     | 
| 
       9 
7 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
                   
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
                    @timestamp    = timestamp
         
     | 
| 
      
 8 
     | 
    
         
            +
                  attr_accessor :document
         
     | 
| 
      
 9 
     | 
    
         
            +
                  attr_accessor :aspects, :aspects_path, :aspects_url
         
     | 
| 
      
 10 
     | 
    
         
            +
                  attr_accessor :timestamp, :pretty
         
     | 
| 
       14 
11 
     | 
    
         | 
| 
      
 12 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 13 
     | 
    
         
            +
                  # Global cache used for storing loaded aspects.
         
     | 
| 
      
 14 
     | 
    
         
            +
                  #
         
     | 
| 
      
 15 
     | 
    
         
            +
                  # @return [Opener::PropertyTagger::AspectsCache.new]
         
     | 
| 
      
 16 
     | 
    
         
            +
                  #
         
     | 
| 
      
 17 
     | 
    
         
            +
                  ASPECTS_CACHE        = AspectsCache.new
         
     | 
| 
      
 18 
     | 
    
         
            +
                  REMOTE_ASPECTS_CACHE = RemoteAspectsCache.new
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 21 
     | 
    
         
            +
                  # @param [String|IO] file The KAF file/input to process.
         
     | 
| 
      
 22 
     | 
    
         
            +
                  # @param [String] aspects_path Path to the aspects.
         
     | 
| 
      
 23 
     | 
    
         
            +
                  # @param [TrueClass|FalseClass] timestamp Add timestamps to the KAF.
         
     | 
| 
      
 24 
     | 
    
         
            +
                  # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
         
     | 
| 
      
 25 
     | 
    
         
            +
                  #  by default due to the performance overhead.
         
     | 
| 
      
 26 
     | 
    
         
            +
                  #
         
     | 
| 
      
 27 
     | 
    
         
            +
                  def initialize file, url: nil, path: nil, timestamp: true, pretty: false
         
     | 
| 
      
 28 
     | 
    
         
            +
                    @document     = Oga.parse_xml file
         
     | 
| 
       15 
29 
     | 
    
         
             
                    raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
         
     | 
| 
      
 30 
     | 
    
         
            +
                    @timestamp    = timestamp
         
     | 
| 
      
 31 
     | 
    
         
            +
                    @pretty       = pretty
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    @remote       = !url.nil?
         
     | 
| 
      
 34 
     | 
    
         
            +
                    @aspects_path = path
         
     | 
| 
      
 35 
     | 
    
         
            +
                    @aspects_url  = url
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    @aspects = if @remote then REMOTE_ASPECTS_CACHE[language] else ASPECTS_CACHE[aspects_file] end
         
     | 
| 
       16 
38 
     | 
    
         
             
                  end
         
     | 
| 
       17 
39 
     | 
    
         | 
| 
       18 
40 
     | 
    
         
             
                  ##
         
     | 
| 
         @@ -20,64 +42,45 @@ module Opener 
     | 
|
| 
       20 
42 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       21 
43 
     | 
    
         
             
                  #
         
     | 
| 
       22 
44 
     | 
    
         
             
                  def process
         
     | 
| 
       23 
     | 
    
         
            -
                    @language = get_language
         
     | 
| 
       24 
     | 
    
         
            -
                    @aspects  = load_aspects
         
     | 
| 
       25 
     | 
    
         
            -
                    @terms    = get_terms
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
45 
     | 
    
         
             
                    existing_aspects = extract_aspects
         
     | 
| 
       28 
46 
     | 
    
         | 
| 
       29 
47 
     | 
    
         
             
                    add_features_layer
         
     | 
| 
       30 
48 
     | 
    
         
             
                    add_properties_layer
         
     | 
| 
       31 
49 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
                     
     | 
| 
      
 50 
     | 
    
         
            +
                    existing_aspects.each_with_index do |(key, value), index|
         
     | 
| 
      
 51 
     | 
    
         
            +
                      index += 1
         
     | 
| 
       33 
52 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
                    existing_aspects.each_pair do |key,value|
         
     | 
| 
       35 
53 
     | 
    
         
             
                      add_property(key, value, index)
         
     | 
| 
       36 
     | 
    
         
            -
                      index += 1
         
     | 
| 
       37 
54 
     | 
    
         
             
                    end
         
     | 
| 
       38 
55 
     | 
    
         | 
| 
       39 
56 
     | 
    
         
             
                    add_linguistic_processor
         
     | 
| 
       40 
57 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
                    return pretty_print(document)
         
     | 
| 
       42 
     | 
    
         
            -
                  end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                  ##
         
     | 
| 
       45 
     | 
    
         
            -
                  # Loads the aspects from the txt file
         
     | 
| 
       46 
     | 
    
         
            -
                  # @return [Hash]
         
     | 
| 
       47 
     | 
    
         
            -
                  #
         
     | 
| 
       48 
     | 
    
         
            -
                  def load_aspects
         
     | 
| 
       49 
     | 
    
         
            -
                    aspects_hash = {}
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                    File.foreach(aspects_file) do |line|
         
     | 
| 
       52 
     | 
    
         
            -
                      lemma, pos, aspect = line.gsub("\n", "").split("\t")
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                      aspects_hash[lemma.to_sym] = [] unless aspects_hash[lemma.to_sym]
         
     | 
| 
       55 
     | 
    
         
            -
                      aspects_hash[lemma.to_sym] << aspect
         
     | 
| 
       56 
     | 
    
         
            -
                    end
         
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
                    return aspects_hash
         
     | 
| 
      
 58 
     | 
    
         
            +
                    return pretty ? pretty_print(document) : document.to_xml
         
     | 
| 
       59 
59 
     | 
    
         
             
                  end
         
     | 
| 
       60 
60 
     | 
    
         | 
| 
       61 
61 
     | 
    
         
             
                  ##
         
     | 
| 
       62 
62 
     | 
    
         
             
                  # Get the language of the input file.
         
     | 
| 
      
 63 
     | 
    
         
            +
                  #
         
     | 
| 
       63 
64 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       64 
65 
     | 
    
         
             
                  #
         
     | 
| 
       65 
     | 
    
         
            -
                  def  
     | 
| 
       66 
     | 
    
         
            -
                    document.at_xpath('KAF').get('xml:lang')
         
     | 
| 
      
 66 
     | 
    
         
            +
                  def language
         
     | 
| 
      
 67 
     | 
    
         
            +
                    return @language ||= document.at_xpath('KAF').get('xml:lang')
         
     | 
| 
       67 
68 
     | 
    
         
             
                  end
         
     | 
| 
       68 
69 
     | 
    
         | 
| 
       69 
70 
     | 
    
         
             
                  ##
         
     | 
| 
       70 
71 
     | 
    
         
             
                  # Get the terms from the input file
         
     | 
| 
       71 
72 
     | 
    
         
             
                  # @return [Hash]
         
     | 
| 
       72 
73 
     | 
    
         
             
                  #
         
     | 
| 
       73 
     | 
    
         
            -
                  def  
     | 
| 
       74 
     | 
    
         
            -
                     
     | 
| 
      
 74 
     | 
    
         
            +
                  def terms
         
     | 
| 
      
 75 
     | 
    
         
            +
                    unless @terms
         
     | 
| 
      
 76 
     | 
    
         
            +
                      @terms = {}
         
     | 
| 
       75 
77 
     | 
    
         | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
      
 78 
     | 
    
         
            +
                      document.xpath('KAF/terms/term').each do |term|
         
     | 
| 
      
 79 
     | 
    
         
            +
                        @terms[term.get('tid').to_sym] = term.get('lemma')
         
     | 
| 
      
 80 
     | 
    
         
            +
                      end
         
     | 
| 
       78 
81 
     | 
    
         
             
                    end
         
     | 
| 
       79 
82 
     | 
    
         | 
| 
       80 
     | 
    
         
            -
                    return  
     | 
| 
      
 83 
     | 
    
         
            +
                    return @terms
         
     | 
| 
       81 
84 
     | 
    
         
             
                  end
         
     | 
| 
       82 
85 
     | 
    
         | 
| 
       83 
86 
     | 
    
         
             
                  ##
         
     | 
| 
         @@ -93,7 +96,7 @@ module Opener 
     | 
|
| 
       93 
96 
     | 
    
         
             
                    # lemmas) belong to a property.
         
     | 
| 
       94 
97 
     | 
    
         
             
                    max_ngram = 2
         
     | 
| 
       95 
98 
     | 
    
         | 
| 
       96 
     | 
    
         
            -
                    uniq_aspects = {}
         
     | 
| 
      
 99 
     | 
    
         
            +
                    uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
         
     | 
| 
       97 
100 
     | 
    
         | 
| 
       98 
101 
     | 
    
         
             
                    while current_token < terms.count
         
     | 
| 
       99 
102 
     | 
    
         
             
                      (0..max_ngram).each do |tam_ngram|
         
     | 
| 
         @@ -107,7 +110,6 @@ module Opener 
     | 
|
| 
       107 
110 
     | 
    
         
             
                            properties.uniq.each do |property|
         
     | 
| 
       108 
111 
     | 
    
         
             
                              next if !property or property.strip.empty?
         
     | 
| 
       109 
112 
     | 
    
         | 
| 
       110 
     | 
    
         
            -
                              uniq_aspects[property.to_sym] = [] unless uniq_aspects[property.to_sym]
         
     | 
| 
       111 
113 
     | 
    
         
             
                              uniq_aspects[property.to_sym] << [ids,ngram]
         
     | 
| 
       112 
114 
     | 
    
         
             
                            end
         
     | 
| 
       113 
115 
     | 
    
         
             
                          end
         
     | 
| 
         @@ -223,9 +225,13 @@ module Opener 
     | 
|
| 
       223 
225 
     | 
    
         
             
                    return !!document.at_xpath('KAF')
         
     | 
| 
       224 
226 
     | 
    
         
             
                  end
         
     | 
| 
       225 
227 
     | 
    
         | 
| 
      
 228 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 229 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
      
 230 
     | 
    
         
            +
                  #
         
     | 
| 
       226 
231 
     | 
    
         
             
                  def aspects_file
         
     | 
| 
       227 
     | 
    
         
            -
                     
     | 
| 
      
 232 
     | 
    
         
            +
                    @aspects_file ||= File.expand_path "#{aspects_path}/#{language}.txt", __FILE__
         
     | 
| 
       228 
233 
     | 
    
         
             
                  end
         
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
             
     | 
| 
       231 
     | 
    
         
            -
            end 
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                end
         
     | 
| 
      
 236 
     | 
    
         
            +
              end
         
     | 
| 
      
 237 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,40 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Opener
         
     | 
| 
      
 2 
     | 
    
         
            +
              class PropertyTagger
         
     | 
| 
      
 3 
     | 
    
         
            +
                ##
         
     | 
| 
      
 4 
     | 
    
         
            +
                # Thread-safe cache for storing the contents of remote aspects.
         
     | 
| 
      
 5 
     | 
    
         
            +
                #
         
     | 
| 
      
 6 
     | 
    
         
            +
                class RemoteAspectsCache
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  include MonitorMixin
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                  def initialize
         
     | 
| 
      
 11 
     | 
    
         
            +
                    super
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                    @url   = ENV['PROPERTY_TAGGER_LEXICONS_URL']
         
     | 
| 
      
 14 
     | 
    
         
            +
                    @cache = {}
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  def [] lang
         
     | 
| 
      
 18 
     | 
    
         
            +
                    synchronize do
         
     | 
| 
      
 19 
     | 
    
         
            +
                      @cache[lang] ||= load_aspects lang
         
     | 
| 
      
 20 
     | 
    
         
            +
                    end
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
                  alias_method :get, :[]
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  def load_aspects lang
         
     | 
| 
      
 25 
     | 
    
         
            +
                    mapping  = Hash.new{ |hash, key| hash[key] = [] }
         
     | 
| 
      
 26 
     | 
    
         
            +
                    url      = "#{@url}&language_code=#{lang}"
         
     | 
| 
      
 27 
     | 
    
         
            +
                    lexicons = JSON.parse HTTPClient.new.get(url).body
         
     | 
| 
      
 28 
     | 
    
         
            +
                    lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
         
     | 
| 
      
 29 
     | 
    
         
            +
                    puts "#{lang}: loaded aspects from #{url}"
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                    lexicons.each do |l|
         
     | 
| 
      
 32 
     | 
    
         
            +
                      mapping[l.lemma.to_sym] << l.aspect
         
     | 
| 
      
 33 
     | 
    
         
            +
                    end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                    return mapping
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
              end
         
     | 
| 
      
 40 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -28,9 +28,12 @@ Gem::Specification.new do |gem| 
     | 
|
| 
       28 
28 
     | 
    
         
             
              gem.add_dependency 'opener-webservice', '~> 2.1'
         
     | 
| 
       29 
29 
     | 
    
         
             
              gem.add_dependency 'opener-core', '~> 2.2'
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
     | 
    
         
            -
              gem.add_dependency 'oga'
         
     | 
| 
      
 31 
     | 
    
         
            +
              gem.add_dependency 'oga', ['~> 1.0', '>= 1.3.1']
         
     | 
| 
      
 32 
     | 
    
         
            +
              gem.add_dependency 'httpclient'
         
     | 
| 
      
 33 
     | 
    
         
            +
              gem.add_dependency 'hashie'
         
     | 
| 
       32 
34 
     | 
    
         | 
| 
       33 
35 
     | 
    
         
             
              gem.add_development_dependency 'rspec', '~> 3.0'
         
     | 
| 
       34 
36 
     | 
    
         
             
              gem.add_development_dependency 'cucumber'
         
     | 
| 
       35 
37 
     | 
    
         
             
              gem.add_development_dependency 'rake'
         
     | 
| 
      
 38 
     | 
    
         
            +
              gem.add_development_dependency 'benchmark-ips', '~> 2.0'
         
     | 
| 
       36 
39 
     | 
    
         
             
            end
         
     | 
    
        data/task/test.rake
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: opener-property-tagger
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 3. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 3.2.1
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - development@olery.com
         
     | 
| 
       8 
     | 
    
         
            -
            autorequire: 
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire:
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date:  
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2020-09-23 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: opener-daemons
         
     | 
| 
         @@ -54,6 +54,40 @@ dependencies: 
     | 
|
| 
       54 
54 
     | 
    
         
             
                    version: '2.2'
         
     | 
| 
       55 
55 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       56 
56 
     | 
    
         
             
              name: oga
         
     | 
| 
      
 57 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 58 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 59 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 60 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 61 
     | 
    
         
            +
                    version: '1.0'
         
     | 
| 
      
 62 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 63 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 64 
     | 
    
         
            +
                    version: 1.3.1
         
     | 
| 
      
 65 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 66 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 67 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 68 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 69 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 70 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 71 
     | 
    
         
            +
                    version: '1.0'
         
     | 
| 
      
 72 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 73 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 74 
     | 
    
         
            +
                    version: 1.3.1
         
     | 
| 
      
 75 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 76 
     | 
    
         
            +
              name: httpclient
         
     | 
| 
      
 77 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 78 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 79 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 80 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 81 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 82 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 83 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 84 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 85 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 86 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 87 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 88 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 89 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 90 
     | 
    
         
            +
              name: hashie
         
     | 
| 
       57 
91 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       58 
92 
     | 
    
         
             
                requirements:
         
     | 
| 
       59 
93 
     | 
    
         
             
                - - ">="
         
     | 
| 
         @@ -108,8 +142,22 @@ dependencies: 
     | 
|
| 
       108 
142 
     | 
    
         
             
                - - ">="
         
     | 
| 
       109 
143 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       110 
144 
     | 
    
         
             
                    version: '0'
         
     | 
| 
      
 145 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 146 
     | 
    
         
            +
              name: benchmark-ips
         
     | 
| 
      
 147 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 148 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 149 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 150 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 151 
     | 
    
         
            +
                    version: '2.0'
         
     | 
| 
      
 152 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 153 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 154 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 155 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 156 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 157 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 158 
     | 
    
         
            +
                    version: '2.0'
         
     | 
| 
       111 
159 
     | 
    
         
             
            description: Property tagger for hotels in Dutch and English.
         
     | 
| 
       112 
     | 
    
         
            -
            email: 
     | 
| 
      
 160 
     | 
    
         
            +
            email:
         
     | 
| 
       113 
161 
     | 
    
         
             
            executables:
         
     | 
| 
       114 
162 
     | 
    
         
             
            - property-tagger
         
     | 
| 
       115 
163 
     | 
    
         
             
            - property-tagger-daemon
         
     | 
| 
         @@ -125,9 +173,11 @@ files: 
     | 
|
| 
       125 
173 
     | 
    
         
             
            - config.ru
         
     | 
| 
       126 
174 
     | 
    
         
             
            - exec/property-tagger.rb
         
     | 
| 
       127 
175 
     | 
    
         
             
            - lib/opener/property_tagger.rb
         
     | 
| 
      
 176 
     | 
    
         
            +
            - lib/opener/property_tagger/aspects_cache.rb
         
     | 
| 
       128 
177 
     | 
    
         
             
            - lib/opener/property_tagger/cli.rb
         
     | 
| 
       129 
178 
     | 
    
         
             
            - lib/opener/property_tagger/processor.rb
         
     | 
| 
       130 
179 
     | 
    
         
             
            - lib/opener/property_tagger/public/markdown.css
         
     | 
| 
      
 180 
     | 
    
         
            +
            - lib/opener/property_tagger/remote_aspects_cache.rb
         
     | 
| 
       131 
181 
     | 
    
         
             
            - lib/opener/property_tagger/server.rb
         
     | 
| 
       132 
182 
     | 
    
         
             
            - lib/opener/property_tagger/version.rb
         
     | 
| 
       133 
183 
     | 
    
         
             
            - lib/opener/property_tagger/views/index.erb
         
     | 
| 
         @@ -139,7 +189,7 @@ homepage: http://opener-project.github.com/ 
     | 
|
| 
       139 
189 
     | 
    
         
             
            licenses:
         
     | 
| 
       140 
190 
     | 
    
         
             
            - Apache 2.0
         
     | 
| 
       141 
191 
     | 
    
         
             
            metadata: {}
         
     | 
| 
       142 
     | 
    
         
            -
            post_install_message: 
     | 
| 
      
 192 
     | 
    
         
            +
            post_install_message:
         
     | 
| 
       143 
193 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       144 
194 
     | 
    
         
             
            require_paths:
         
     | 
| 
       145 
195 
     | 
    
         
             
            - lib
         
     | 
| 
         @@ -154,10 +204,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       154 
204 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       155 
205 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       156 
206 
     | 
    
         
             
            requirements: []
         
     | 
| 
       157 
     | 
    
         
            -
            rubyforge_project: 
     | 
| 
       158 
     | 
    
         
            -
            rubygems_version: 2. 
     | 
| 
       159 
     | 
    
         
            -
            signing_key: 
     | 
| 
      
 207 
     | 
    
         
            +
            rubyforge_project:
         
     | 
| 
      
 208 
     | 
    
         
            +
            rubygems_version: 2.7.8
         
     | 
| 
      
 209 
     | 
    
         
            +
            signing_key:
         
     | 
| 
       160 
210 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       161 
211 
     | 
    
         
             
            summary: Property tagger for hotels in Dutch and English.
         
     | 
| 
       162 
212 
     | 
    
         
             
            test_files: []
         
     | 
| 
       163 
     | 
    
         
            -
            has_rdoc: 
         
     |