RubyGems - browser_web_data_entity_sumarization - Versions diffs - 1.0.0beta1 - Mend

browser_web_data_entity_sumarization 1.0.0beta1

Files changed (18) hide show

checksums.yaml +7 -0
data/lib/browser_web_data_entity_sumarization.rb +23 -0
data/lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb +59 -0
data/lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb +33 -0
data/lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb +263 -0
data/lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb +609 -0
data/lib/browser_web_data_entity_sumarization/sparql_request.rb +74 -0
data/lib/browser_web_data_entity_sumarization/version.rb +3 -0
data/lib/config/entity_sumarization_config.rb +48 -0
data/lib/knowledge/classes_hierarchy.json +906 -0
data/lib/knowledge/common_properties.json +23 -0
data/lib/knowledge/entity_classes.json +1 -0
data/lib/knowledge/knowledge_base.json +40642 -0
data/lib/utils/cache_helper.rb +69 -0
data/lib/utils/hash_helper.rb +79 -0
data/lib/utils/sparql_queries.rb +126 -0
data/lib/utils/string_helper.rb +31 -0
metadata +74 -0

data/lib/utils/cache_helper.rb ADDED Viewed

@@ -0,0 +1,69 @@
+#encoding: utf-8
+# Module CacheHelper
+module CacheHelper
+  ###
+  # The method clear all application cache
+  #
+  # @param [String] type File extension type.
+  def self.clear_cache(type = '.json')
+    dir = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}/*#{type}"
+    Dir.glob(dir).each { |path|
+      FileUtils.rm_f(path)
+    }
+  end
+  ##
+  # The method helps to load cached json.
+  # This cache is permanent and reload only if no exist or set demand_reload
+  #
+  # @param [UU::OS::UESURI] binary_ap_uri UU::OS::UESURI of property to load with caching.
+  # @param [Hash] params Load parameters
+  # @option params [Hash] :json Optional parameters. Json parse attributes. Default is {symbolize_names:true}.
+  # @option params [Fixnum] :ttl Optional parameters. Time to live in second, for this time duration will be load property from json_cache file. Default is 10800.
+  # @option params [Boolean] :demanded_reload Optional parameters. Flag to reload value from property. Default is false.
+  #
+  # @return [Hash] hash_value
+  #
+  # @yield return value must be Hash
+  def self.load_cached(key, params = {}, &block)
+    default_load_attrs = {
+        update: false,
+        json: {symbolize_names: true},
+        ttl: 0,
+        demanded_reload: false
+    }
+    params = default_load_attrs.merge(params)
+    hash = {}
+    cache_dir_path = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}"
+    Dir.mkdir(cache_dir_path) unless Dir.exist?(cache_dir_path)
+    cache_file_path = "#{cache_dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
+    if params[:demanded_reload] || !File.exists?(cache_file_path) || (params[:ttl] && Time.now - File.ctime(cache_file_path) > params[:ttl])
+      if block_given?
+        hash = yield hash
+        File.open(cache_file_path, 'w') { |f| f.puts hash.to_json } unless hash.empty?
+      end
+    else
+      hash = JSON.parse(File.read(cache_file_path).force_encoding('UTF-8'), params[:json])
+    end
+    HashHelper.recursive_symbolize_keys(hash)
+  end
+  ###
+  # The method helps to get build in knowledge by key.
+  #
+  # @param [String] key
+  def self.load_knowledge(key)
+    dir_path = "#{File.dirname(File.expand_path('..', __FILE__))}/knowledge"
+    file_path = "#{dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
+    JSON.parse(File.read(file_path), symbolize_names: true)
+  end
+end

data/lib/utils/hash_helper.rb ADDED Viewed

@@ -0,0 +1,79 @@
+#encoding: utf-8
+# Module HashHelper
+module HashHelper
+  ##
+  # The method helps to get new sorted hash by key.
+  #
+  # @param [Hash] hash Input hash which will be sorted.
+  # @param [Symbol, String] type Type of sorting, default is asc as ascending. One of [:asc, :desc]
+  #
+  # @return [Hash] sorted_hash
+  def self.get_sorted(hash, type = :asc)
+    hash = {} unless hash
+    case type.to_s.downcase.to_sym
+      when :asc
+        Hash[hash.sort]
+      when :desc
+        Hash[hash.sort{|a,b| a<=>b}]
+      else
+        hash
+    end
+  end
+  ##
+  # The method recursively symbolizes keys of hash.
+  #
+  # @param [Hash, Enumerable] input_value Data to by symbolized.
+  # @return [Hash, Enumerable] Symbolized data.
+  def self.recursive_symbolize_keys(input_value)
+    case input_value
+      when Hash
+        Hash[
+            input_value.map do |k, v|
+              [k.respond_to?(:to_sym) ? k.to_sym : k, recursive_symbolize_keys(v)]
+            end
+        ]
+      when Enumerable
+        input_value.map { |v| recursive_symbolize_keys(v) }
+      else
+        input_value
+    end
+  end
+  ##
+  # The method recursively unsymbolizes keys of hash.
+  #
+  # @param [Hash, Enumerable] input_value Data to by symbolized.
+  # @return [Hash, Enumerable] Symbolized data.
+  def self.recursive_unsymbolize_keys(input_value)
+    case input_value
+      when Hash
+        Hash[
+            input_value.map do |k, v|
+              [k.respond_to?(:to_s) ? k.to_s : k, recursive_unsymbolize_keys(v)]
+            end
+        ]
+      when Enumerable
+        input_value.map { |v| recursive_unsymbolize_keys(v) }
+      else
+        input_value
+    end
+  end
+  def self.recursive_map_keys(data)
+    data.map{|k,v|
+      if v.is_a?(Hash) && !v.empty?
+        inner_array = recursive_map_keys(v)
+      else
+        inner_array = []
+      end
+      [k] + inner_array
+    }.reduce(:+)
+  end
+end

data/lib/utils/sparql_queries.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# encoding: utf-8
+module SPARQLQueries
+  def resources_by_dbpedia_page_rank(entity_type, limit = 10)
+    entity_type = entity_type['http'] ? "<#{entity_type}>" : "dbo:#{entity_type}"
+    " PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+      PREFIX dbo:<http://dbpedia.org/ontology/>
+      PREFIX vrank:<http://purl.org/voc/vrank#>
+      SELECT ?entity ?rank
+      FROM <http://dbpedia.org>
+      FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
+      WHERE {
+        ?entity rdf:type #{entity_type}.
+        ?entity vrank:hasRank/vrank:rankValue ?rank.
+      }
+      ORDER BY DESC(?rank) LIMIT #{limit}"
+  end
+  def all_predicates_by_object(object)
+    object = object['http'] ? "<#{object}>" : "dbo:#{object}"
+    " PREFIX dbo:	<http://dbpedia.org/ontology/>
+      PREFIX dbp:	<http://dbpedia.org/property/>
+      SELECT DISTINCT ?property
+      WHERE {
+        ?subject ?property #{object}.
+      }"
+  end
+  def all_predicates_by_subject(subject, only_literal)
+    subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
+    filter = only_literal ? 'FILTER(isLiteral(?object))' : nil
+    " PREFIX dbo:	<http://dbpedia.org/ontology/>
+      PREFIX dbp:	<http://dbpedia.org/property/>
+      SELECT DISTINCT ?property
+      WHERE {
+        #{subject} ?property ?object.
+        #{filter}
+      }"
+  end
+  def all_predicates_by_object_and_subject(subject, object)
+    subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
+    object = object['http'] ? "<#{object}>" : "dbo:#{object}"
+    " PREFIX dbo:	<http://dbpedia.org/ontology/>
+      PREFIX dbp:	<http://dbpedia.org/property/>
+      SELECT DISTINCT ?property
+      WHERE {
+        #{subject} ?property #{object}.
+      }"
+  end
+  def count_predicate_by_entity(entity_class, predicate)
+    entity_class = entity_class['http'] ? "<#{entity_class}>" : "dbo:#{entity_class}"
+    predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
+    " PREFIX dbo:	<http://dbpedia.org/ontology/>
+      PREFIX dbp:	<http://dbpedia.org/property/>
+      SELECT DISTINCT COUNT(?subject) as ?count
+      WHERE {
+        ?subject a #{entity_class} .
+        {?subject #{predicate} ?a .} UNION {?b #{predicate} ?subject .}
+      }
+      ORDER BY DESC(?count)"
+  end
+  def count_of_identical_predicates(predicates)
+    predicates = [predicates] unless predicates.is_a?(Array)
+    where_part = predicates.map{|predicate|
+      predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
+      "?subject #{predicate} ?object ."
+    }.join("\n")
+    " SELECT COUNT(DISTINCT ?subject) AS ?count
+      WHERE{#{where_part}
+     }"
+  end
+  def resource_properties(resource, lang = 'en')
+    resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}>"
+    " PREFIX dbo:	<http://dbpedia.org/ontology/>
+      PREFIX dbp:	<http://dbpedia.org/property/>
+      SELECT DISTINCT ?predicate, ?predicate_label, ?value, ?value_label
+      WHERE {
+        { #{resource} ?predicate ?value . } UNION { ?value ?predicate #{resource} . }
+        OPTIONAL{
+          ?value rdfs:label ?value_label .
+          FILTER (lang(?value_label) = '#{lang}')
+        }
+        ?predicate rdfs:label ?predicate_label .
+        FILTER (lang(?predicate_label) = '#{lang}')
+      }"
+  end
+  def entity_classes(resource)
+    resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}"
+    " SELECT DISTINCT ?entity_class
+      WHERE {
+        #{resource} a ?entity_class .
+        ?entity_class a owl:Class .
+      }"
+  end
+  def self.included(base)
+    base.extend SPARQLQueries
+  end
+end

data/lib/utils/string_helper.rb ADDED Viewed

@@ -0,0 +1,31 @@
+#encoding: utf-8
+# Module StringHelper
+module StringHelper
+  ##
+  # The method helps to replace problematic chars from string to be used as part of file path.
+  #
+  # @param [String] path
+  #
+  # @return [String] path
+  def self.get_clear_file_path(path)
+    path.to_s.gsub(/[:\/\.\*#]/, '_')
+  end
+  ##
+  # The method helps to get snake case string from camel case one.
+  #
+  # @param [String] path
+  #
+  # @return [String] snake_cased_string
+  def self.get_snake_case(string)
+    string.to_s.gsub(/::/, '/').
+        gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+        gsub(/([a-z\d])([A-Z])/,'\1_\2').
+        tr('-', '_').
+        downcase
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,74 @@
+--- !ruby/object:Gem::Specification
+name: browser_web_data_entity_sumarization
+version: !ruby/object:Gem::Version
+  version: 1.0.0beta1
+platform: ruby
+authors:
+- Marek Filteš
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2017-04-12 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 2.1.0
+  name: sparql-client
+  prerelease: false
+  type: :runtime
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '='
+      - !ruby/object:Gem::Version
+        version: 2.1.0
+description:
+email: marek.filtes@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/browser_web_data_entity_sumarization.rb
+- lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb
+- lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb
+- lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb
+- lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb
+- lib/browser_web_data_entity_sumarization/sparql_request.rb
+- lib/browser_web_data_entity_sumarization/version.rb
+- lib/config/entity_sumarization_config.rb
+- lib/knowledge/classes_hierarchy.json
+- lib/knowledge/common_properties.json
+- lib/knowledge/entity_classes.json
+- lib/knowledge/knowledge_base.json
+- lib/utils/cache_helper.rb
+- lib/utils/hash_helper.rb
+- lib/utils/sparql_queries.rb
+- lib/utils/string_helper.rb
+homepage:
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+- results
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 1.3.1
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: Tool for entity sumarization.
+test_files: []