browser_web_data_entity_sumarization 1.0.0beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ #encoding: utf-8
2
+
3
+ # Module CacheHelper
4
+ module CacheHelper
5
+
6
+ ###
7
+ # The method clear all application cache
8
+ #
9
+ # @param [String] type File extension type.
10
+ def self.clear_cache(type = '.json')
11
+ dir = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}/*#{type}"
12
+ Dir.glob(dir).each { |path|
13
+ FileUtils.rm_f(path)
14
+ }
15
+ end
16
+
17
+ ##
18
+ # The method helps to load cached json.
19
+ # This cache is permanent and reload only if no exist or set demand_reload
20
+ #
21
+ # @param [UU::OS::UESURI] binary_ap_uri UU::OS::UESURI of property to load with caching.
22
+ # @param [Hash] params Load parameters
23
+ # @option params [Hash] :json Optional parameters. Json parse attributes. Default is {symbolize_names:true}.
24
+ # @option params [Fixnum] :ttl Optional parameters. Time to live in second, for this time duration will be load property from json_cache file. Default is 10800.
25
+ # @option params [Boolean] :demanded_reload Optional parameters. Flag to reload value from property. Default is false.
26
+ #
27
+ # @return [Hash] hash_value
28
+ #
29
+ # @yield return value must be Hash
30
+ def self.load_cached(key, params = {}, &block)
31
+ default_load_attrs = {
32
+ update: false,
33
+ json: {symbolize_names: true},
34
+ ttl: 0,
35
+ demanded_reload: false
36
+ }
37
+ params = default_load_attrs.merge(params)
38
+ hash = {}
39
+
40
+ cache_dir_path = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}"
41
+ Dir.mkdir(cache_dir_path) unless Dir.exist?(cache_dir_path)
42
+ cache_file_path = "#{cache_dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
43
+
44
+ if params[:demanded_reload] || !File.exists?(cache_file_path) || (params[:ttl] && Time.now - File.ctime(cache_file_path) > params[:ttl])
45
+
46
+ if block_given?
47
+ hash = yield hash
48
+ File.open(cache_file_path, 'w') { |f| f.puts hash.to_json } unless hash.empty?
49
+ end
50
+ else
51
+ hash = JSON.parse(File.read(cache_file_path).force_encoding('UTF-8'), params[:json])
52
+
53
+ end
54
+
55
+ HashHelper.recursive_symbolize_keys(hash)
56
+ end
57
+
58
+ ###
59
+ # The method helps to get build in knowledge by key.
60
+ #
61
+ # @param [String] key
62
+ def self.load_knowledge(key)
63
+ dir_path = "#{File.dirname(File.expand_path('..', __FILE__))}/knowledge"
64
+ file_path = "#{dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
65
+
66
+ JSON.parse(File.read(file_path), symbolize_names: true)
67
+ end
68
+
69
+ end
@@ -0,0 +1,79 @@
1
+ #encoding: utf-8
2
+
3
+
4
+ # Module HashHelper
5
+ module HashHelper
6
+
7
+ ##
8
+ # The method helps to get new sorted hash by key.
9
+ #
10
+ # @param [Hash] hash Input hash which will be sorted.
11
+ # @param [Symbol, String] type Type of sorting, default is asc as ascending. One of [:asc, :desc]
12
+ #
13
+ # @return [Hash] sorted_hash
14
+ def self.get_sorted(hash, type = :asc)
15
+ hash = {} unless hash
16
+ case type.to_s.downcase.to_sym
17
+ when :asc
18
+ Hash[hash.sort]
19
+ when :desc
20
+ Hash[hash.sort{|a,b| a<=>b}]
21
+ else
22
+ hash
23
+ end
24
+ end
25
+
26
+ ##
27
+ # The method recursively symbolizes keys of hash.
28
+ #
29
+ # @param [Hash, Enumerable] input_value Data to by symbolized.
30
+ # @return [Hash, Enumerable] Symbolized data.
31
+ def self.recursive_symbolize_keys(input_value)
32
+ case input_value
33
+ when Hash
34
+ Hash[
35
+ input_value.map do |k, v|
36
+ [k.respond_to?(:to_sym) ? k.to_sym : k, recursive_symbolize_keys(v)]
37
+ end
38
+ ]
39
+ when Enumerable
40
+ input_value.map { |v| recursive_symbolize_keys(v) }
41
+ else
42
+ input_value
43
+ end
44
+ end
45
+
46
+ ##
47
+ # The method recursively unsymbolizes keys of hash.
48
+ #
49
+ # @param [Hash, Enumerable] input_value Data to by symbolized.
50
+ # @return [Hash, Enumerable] Symbolized data.
51
+ def self.recursive_unsymbolize_keys(input_value)
52
+ case input_value
53
+ when Hash
54
+ Hash[
55
+ input_value.map do |k, v|
56
+ [k.respond_to?(:to_s) ? k.to_s : k, recursive_unsymbolize_keys(v)]
57
+ end
58
+ ]
59
+ when Enumerable
60
+ input_value.map { |v| recursive_unsymbolize_keys(v) }
61
+ else
62
+ input_value
63
+ end
64
+ end
65
+
66
+
67
+ def self.recursive_map_keys(data)
68
+ data.map{|k,v|
69
+ if v.is_a?(Hash) && !v.empty?
70
+ inner_array = recursive_map_keys(v)
71
+ else
72
+ inner_array = []
73
+ end
74
+
75
+ [k] + inner_array
76
+ }.reduce(:+)
77
+ end
78
+
79
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: utf-8
2
+
3
+ module SPARQLQueries
4
+
5
+ def resources_by_dbpedia_page_rank(entity_type, limit = 10)
6
+ entity_type = entity_type['http'] ? "<#{entity_type}>" : "dbo:#{entity_type}"
7
+
8
+ " PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
9
+ PREFIX dbo:<http://dbpedia.org/ontology/>
10
+ PREFIX vrank:<http://purl.org/voc/vrank#>
11
+
12
+ SELECT ?entity ?rank
13
+ FROM <http://dbpedia.org>
14
+ FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
15
+ WHERE {
16
+ ?entity rdf:type #{entity_type}.
17
+ ?entity vrank:hasRank/vrank:rankValue ?rank.
18
+ }
19
+ ORDER BY DESC(?rank) LIMIT #{limit}"
20
+ end
21
+
22
+ def all_predicates_by_object(object)
23
+ object = object['http'] ? "<#{object}>" : "dbo:#{object}"
24
+
25
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
26
+ PREFIX dbp: <http://dbpedia.org/property/>
27
+
28
+ SELECT DISTINCT ?property
29
+
30
+ WHERE {
31
+ ?subject ?property #{object}.
32
+ }"
33
+ end
34
+
35
+ def all_predicates_by_subject(subject, only_literal)
36
+ subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
37
+ filter = only_literal ? 'FILTER(isLiteral(?object))' : nil
38
+
39
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
40
+ PREFIX dbp: <http://dbpedia.org/property/>
41
+
42
+ SELECT DISTINCT ?property
43
+
44
+ WHERE {
45
+ #{subject} ?property ?object.
46
+ #{filter}
47
+ }"
48
+ end
49
+
50
+ def all_predicates_by_object_and_subject(subject, object)
51
+ subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
52
+ object = object['http'] ? "<#{object}>" : "dbo:#{object}"
53
+
54
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
55
+ PREFIX dbp: <http://dbpedia.org/property/>
56
+
57
+ SELECT DISTINCT ?property
58
+
59
+ WHERE {
60
+ #{subject} ?property #{object}.
61
+ }"
62
+ end
63
+
64
+ def count_predicate_by_entity(entity_class, predicate)
65
+ entity_class = entity_class['http'] ? "<#{entity_class}>" : "dbo:#{entity_class}"
66
+ predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
67
+
68
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
69
+ PREFIX dbp: <http://dbpedia.org/property/>
70
+
71
+ SELECT DISTINCT COUNT(?subject) as ?count
72
+
73
+ WHERE {
74
+ ?subject a #{entity_class} .
75
+ {?subject #{predicate} ?a .} UNION {?b #{predicate} ?subject .}
76
+ }
77
+
78
+ ORDER BY DESC(?count)"
79
+ end
80
+
81
+ def count_of_identical_predicates(predicates)
82
+ predicates = [predicates] unless predicates.is_a?(Array)
83
+ where_part = predicates.map{|predicate|
84
+ predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
85
+ "?subject #{predicate} ?object ."
86
+ }.join("\n")
87
+
88
+ " SELECT COUNT(DISTINCT ?subject) AS ?count
89
+ WHERE{#{where_part}
90
+ }"
91
+ end
92
+
93
+ def resource_properties(resource, lang = 'en')
94
+ resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}>"
95
+
96
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
97
+ PREFIX dbp: <http://dbpedia.org/property/>
98
+ SELECT DISTINCT ?predicate, ?predicate_label, ?value, ?value_label
99
+ WHERE {
100
+ { #{resource} ?predicate ?value . } UNION { ?value ?predicate #{resource} . }
101
+
102
+ OPTIONAL{
103
+ ?value rdfs:label ?value_label .
104
+ FILTER (lang(?value_label) = '#{lang}')
105
+ }
106
+
107
+ ?predicate rdfs:label ?predicate_label .
108
+ FILTER (lang(?predicate_label) = '#{lang}')
109
+ }"
110
+ end
111
+
112
+ def entity_classes(resource)
113
+ resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}"
114
+
115
+ " SELECT DISTINCT ?entity_class
116
+ WHERE {
117
+ #{resource} a ?entity_class .
118
+ ?entity_class a owl:Class .
119
+ }"
120
+ end
121
+
122
+ def self.included(base)
123
+ base.extend SPARQLQueries
124
+ end
125
+
126
+ end
@@ -0,0 +1,31 @@
1
+ #encoding: utf-8
2
+
3
+
4
+ # Module StringHelper
5
+ module StringHelper
6
+
7
+ ##
8
+ # The method helps to replace problematic chars from string to be used as part of file path.
9
+ #
10
+ # @param [String] path
11
+ #
12
+ # @return [String] path
13
+ def self.get_clear_file_path(path)
14
+ path.to_s.gsub(/[:\/\.\*#]/, '_')
15
+ end
16
+
17
+ ##
18
+ # The method helps to get snake case string from camel case one.
19
+ #
20
+ # @param [String] path
21
+ #
22
+ # @return [String] snake_cased_string
23
+ def self.get_snake_case(string)
24
+ string.to_s.gsub(/::/, '/').
25
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
26
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
27
+ tr('-', '_').
28
+ downcase
29
+ end
30
+
31
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: browser_web_data_entity_sumarization
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0beta1
5
+ platform: ruby
6
+ authors:
7
+ - Marek Filteš
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-04-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - '='
17
+ - !ruby/object:Gem::Version
18
+ version: 2.1.0
19
+ name: sparql-client
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.1.0
27
+ description:
28
+ email: marek.filtes@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/browser_web_data_entity_sumarization.rb
34
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb
35
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb
36
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb
37
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb
38
+ - lib/browser_web_data_entity_sumarization/sparql_request.rb
39
+ - lib/browser_web_data_entity_sumarization/version.rb
40
+ - lib/config/entity_sumarization_config.rb
41
+ - lib/knowledge/classes_hierarchy.json
42
+ - lib/knowledge/common_properties.json
43
+ - lib/knowledge/entity_classes.json
44
+ - lib/knowledge/knowledge_base.json
45
+ - lib/utils/cache_helper.rb
46
+ - lib/utils/hash_helper.rb
47
+ - lib/utils/sparql_queries.rb
48
+ - lib/utils/string_helper.rb
49
+ homepage:
50
+ licenses:
51
+ - MIT
52
+ metadata: {}
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ - results
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">"
66
+ - !ruby/object:Gem::Version
67
+ version: 1.3.1
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.8
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Tool for entity sumarization.
74
+ test_files: []