browser_web_data_entity_sumarization 1.0.0beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/browser_web_data_entity_sumarization.rb +23 -0
- data/lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb +59 -0
- data/lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb +33 -0
- data/lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb +263 -0
- data/lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb +609 -0
- data/lib/browser_web_data_entity_sumarization/sparql_request.rb +74 -0
- data/lib/browser_web_data_entity_sumarization/version.rb +3 -0
- data/lib/config/entity_sumarization_config.rb +48 -0
- data/lib/knowledge/classes_hierarchy.json +906 -0
- data/lib/knowledge/common_properties.json +23 -0
- data/lib/knowledge/entity_classes.json +1 -0
- data/lib/knowledge/knowledge_base.json +40642 -0
- data/lib/utils/cache_helper.rb +69 -0
- data/lib/utils/hash_helper.rb +79 -0
- data/lib/utils/sparql_queries.rb +126 -0
- data/lib/utils/string_helper.rb +31 -0
- metadata +74 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
# Module CacheHelper
|
4
|
+
module CacheHelper
|
5
|
+
|
6
|
+
###
|
7
|
+
# The method clear all application cache
|
8
|
+
#
|
9
|
+
# @param [String] type File extension type.
|
10
|
+
def self.clear_cache(type = '.json')
|
11
|
+
dir = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}/*#{type}"
|
12
|
+
Dir.glob(dir).each { |path|
|
13
|
+
FileUtils.rm_f(path)
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# The method helps to load cached json.
|
19
|
+
# This cache is permanent and reload only if no exist or set demand_reload
|
20
|
+
#
|
21
|
+
# @param [UU::OS::UESURI] binary_ap_uri UU::OS::UESURI of property to load with caching.
|
22
|
+
# @param [Hash] params Load parameters
|
23
|
+
# @option params [Hash] :json Optional parameters. Json parse attributes. Default is {symbolize_names:true}.
|
24
|
+
# @option params [Fixnum] :ttl Optional parameters. Time to live in second, for this time duration will be load property from json_cache file. Default is 10800.
|
25
|
+
# @option params [Boolean] :demanded_reload Optional parameters. Flag to reload value from property. Default is false.
|
26
|
+
#
|
27
|
+
# @return [Hash] hash_value
|
28
|
+
#
|
29
|
+
# @yield return value must be Hash
|
30
|
+
def self.load_cached(key, params = {}, &block)
|
31
|
+
default_load_attrs = {
|
32
|
+
update: false,
|
33
|
+
json: {symbolize_names: true},
|
34
|
+
ttl: 0,
|
35
|
+
demanded_reload: false
|
36
|
+
}
|
37
|
+
params = default_load_attrs.merge(params)
|
38
|
+
hash = {}
|
39
|
+
|
40
|
+
cache_dir_path = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}"
|
41
|
+
Dir.mkdir(cache_dir_path) unless Dir.exist?(cache_dir_path)
|
42
|
+
cache_file_path = "#{cache_dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
|
43
|
+
|
44
|
+
if params[:demanded_reload] || !File.exists?(cache_file_path) || (params[:ttl] && Time.now - File.ctime(cache_file_path) > params[:ttl])
|
45
|
+
|
46
|
+
if block_given?
|
47
|
+
hash = yield hash
|
48
|
+
File.open(cache_file_path, 'w') { |f| f.puts hash.to_json } unless hash.empty?
|
49
|
+
end
|
50
|
+
else
|
51
|
+
hash = JSON.parse(File.read(cache_file_path).force_encoding('UTF-8'), params[:json])
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
HashHelper.recursive_symbolize_keys(hash)
|
56
|
+
end
|
57
|
+
|
58
|
+
###
|
59
|
+
# The method helps to get build in knowledge by key.
|
60
|
+
#
|
61
|
+
# @param [String] key
|
62
|
+
def self.load_knowledge(key)
|
63
|
+
dir_path = "#{File.dirname(File.expand_path('..', __FILE__))}/knowledge"
|
64
|
+
file_path = "#{dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
|
65
|
+
|
66
|
+
JSON.parse(File.read(file_path), symbolize_names: true)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
# Module HashHelper
|
5
|
+
module HashHelper
|
6
|
+
|
7
|
+
##
|
8
|
+
# The method helps to get new sorted hash by key.
|
9
|
+
#
|
10
|
+
# @param [Hash] hash Input hash which will be sorted.
|
11
|
+
# @param [Symbol, String] type Type of sorting, default is asc as ascending. One of [:asc, :desc]
|
12
|
+
#
|
13
|
+
# @return [Hash] sorted_hash
|
14
|
+
def self.get_sorted(hash, type = :asc)
|
15
|
+
hash = {} unless hash
|
16
|
+
case type.to_s.downcase.to_sym
|
17
|
+
when :asc
|
18
|
+
Hash[hash.sort]
|
19
|
+
when :desc
|
20
|
+
Hash[hash.sort{|a,b| a<=>b}]
|
21
|
+
else
|
22
|
+
hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# The method recursively symbolizes keys of hash.
|
28
|
+
#
|
29
|
+
# @param [Hash, Enumerable] input_value Data to by symbolized.
|
30
|
+
# @return [Hash, Enumerable] Symbolized data.
|
31
|
+
def self.recursive_symbolize_keys(input_value)
|
32
|
+
case input_value
|
33
|
+
when Hash
|
34
|
+
Hash[
|
35
|
+
input_value.map do |k, v|
|
36
|
+
[k.respond_to?(:to_sym) ? k.to_sym : k, recursive_symbolize_keys(v)]
|
37
|
+
end
|
38
|
+
]
|
39
|
+
when Enumerable
|
40
|
+
input_value.map { |v| recursive_symbolize_keys(v) }
|
41
|
+
else
|
42
|
+
input_value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# The method recursively unsymbolizes keys of hash.
|
48
|
+
#
|
49
|
+
# @param [Hash, Enumerable] input_value Data to by symbolized.
|
50
|
+
# @return [Hash, Enumerable] Symbolized data.
|
51
|
+
def self.recursive_unsymbolize_keys(input_value)
|
52
|
+
case input_value
|
53
|
+
when Hash
|
54
|
+
Hash[
|
55
|
+
input_value.map do |k, v|
|
56
|
+
[k.respond_to?(:to_s) ? k.to_s : k, recursive_unsymbolize_keys(v)]
|
57
|
+
end
|
58
|
+
]
|
59
|
+
when Enumerable
|
60
|
+
input_value.map { |v| recursive_unsymbolize_keys(v) }
|
61
|
+
else
|
62
|
+
input_value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def self.recursive_map_keys(data)
|
68
|
+
data.map{|k,v|
|
69
|
+
if v.is_a?(Hash) && !v.empty?
|
70
|
+
inner_array = recursive_map_keys(v)
|
71
|
+
else
|
72
|
+
inner_array = []
|
73
|
+
end
|
74
|
+
|
75
|
+
[k] + inner_array
|
76
|
+
}.reduce(:+)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SPARQLQueries
|
4
|
+
|
5
|
+
def resources_by_dbpedia_page_rank(entity_type, limit = 10)
|
6
|
+
entity_type = entity_type['http'] ? "<#{entity_type}>" : "dbo:#{entity_type}"
|
7
|
+
|
8
|
+
" PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
9
|
+
PREFIX dbo:<http://dbpedia.org/ontology/>
|
10
|
+
PREFIX vrank:<http://purl.org/voc/vrank#>
|
11
|
+
|
12
|
+
SELECT ?entity ?rank
|
13
|
+
FROM <http://dbpedia.org>
|
14
|
+
FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
|
15
|
+
WHERE {
|
16
|
+
?entity rdf:type #{entity_type}.
|
17
|
+
?entity vrank:hasRank/vrank:rankValue ?rank.
|
18
|
+
}
|
19
|
+
ORDER BY DESC(?rank) LIMIT #{limit}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def all_predicates_by_object(object)
|
23
|
+
object = object['http'] ? "<#{object}>" : "dbo:#{object}"
|
24
|
+
|
25
|
+
" PREFIX dbo: <http://dbpedia.org/ontology/>
|
26
|
+
PREFIX dbp: <http://dbpedia.org/property/>
|
27
|
+
|
28
|
+
SELECT DISTINCT ?property
|
29
|
+
|
30
|
+
WHERE {
|
31
|
+
?subject ?property #{object}.
|
32
|
+
}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def all_predicates_by_subject(subject, only_literal)
|
36
|
+
subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
|
37
|
+
filter = only_literal ? 'FILTER(isLiteral(?object))' : nil
|
38
|
+
|
39
|
+
" PREFIX dbo: <http://dbpedia.org/ontology/>
|
40
|
+
PREFIX dbp: <http://dbpedia.org/property/>
|
41
|
+
|
42
|
+
SELECT DISTINCT ?property
|
43
|
+
|
44
|
+
WHERE {
|
45
|
+
#{subject} ?property ?object.
|
46
|
+
#{filter}
|
47
|
+
}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def all_predicates_by_object_and_subject(subject, object)
|
51
|
+
subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
|
52
|
+
object = object['http'] ? "<#{object}>" : "dbo:#{object}"
|
53
|
+
|
54
|
+
" PREFIX dbo: <http://dbpedia.org/ontology/>
|
55
|
+
PREFIX dbp: <http://dbpedia.org/property/>
|
56
|
+
|
57
|
+
SELECT DISTINCT ?property
|
58
|
+
|
59
|
+
WHERE {
|
60
|
+
#{subject} ?property #{object}.
|
61
|
+
}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def count_predicate_by_entity(entity_class, predicate)
|
65
|
+
entity_class = entity_class['http'] ? "<#{entity_class}>" : "dbo:#{entity_class}"
|
66
|
+
predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
|
67
|
+
|
68
|
+
" PREFIX dbo: <http://dbpedia.org/ontology/>
|
69
|
+
PREFIX dbp: <http://dbpedia.org/property/>
|
70
|
+
|
71
|
+
SELECT DISTINCT COUNT(?subject) as ?count
|
72
|
+
|
73
|
+
WHERE {
|
74
|
+
?subject a #{entity_class} .
|
75
|
+
{?subject #{predicate} ?a .} UNION {?b #{predicate} ?subject .}
|
76
|
+
}
|
77
|
+
|
78
|
+
ORDER BY DESC(?count)"
|
79
|
+
end
|
80
|
+
|
81
|
+
def count_of_identical_predicates(predicates)
|
82
|
+
predicates = [predicates] unless predicates.is_a?(Array)
|
83
|
+
where_part = predicates.map{|predicate|
|
84
|
+
predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
|
85
|
+
"?subject #{predicate} ?object ."
|
86
|
+
}.join("\n")
|
87
|
+
|
88
|
+
" SELECT COUNT(DISTINCT ?subject) AS ?count
|
89
|
+
WHERE{#{where_part}
|
90
|
+
}"
|
91
|
+
end
|
92
|
+
|
93
|
+
def resource_properties(resource, lang = 'en')
|
94
|
+
resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}>"
|
95
|
+
|
96
|
+
" PREFIX dbo: <http://dbpedia.org/ontology/>
|
97
|
+
PREFIX dbp: <http://dbpedia.org/property/>
|
98
|
+
SELECT DISTINCT ?predicate, ?predicate_label, ?value, ?value_label
|
99
|
+
WHERE {
|
100
|
+
{ #{resource} ?predicate ?value . } UNION { ?value ?predicate #{resource} . }
|
101
|
+
|
102
|
+
OPTIONAL{
|
103
|
+
?value rdfs:label ?value_label .
|
104
|
+
FILTER (lang(?value_label) = '#{lang}')
|
105
|
+
}
|
106
|
+
|
107
|
+
?predicate rdfs:label ?predicate_label .
|
108
|
+
FILTER (lang(?predicate_label) = '#{lang}')
|
109
|
+
}"
|
110
|
+
end
|
111
|
+
|
112
|
+
def entity_classes(resource)
|
113
|
+
resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}"
|
114
|
+
|
115
|
+
" SELECT DISTINCT ?entity_class
|
116
|
+
WHERE {
|
117
|
+
#{resource} a ?entity_class .
|
118
|
+
?entity_class a owl:Class .
|
119
|
+
}"
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.included(base)
|
123
|
+
base.extend SPARQLQueries
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
# Module StringHelper
|
5
|
+
module StringHelper
|
6
|
+
|
7
|
+
##
|
8
|
+
# The method helps to replace problematic chars from string to be used as part of file path.
|
9
|
+
#
|
10
|
+
# @param [String] path
|
11
|
+
#
|
12
|
+
# @return [String] path
|
13
|
+
def self.get_clear_file_path(path)
|
14
|
+
path.to_s.gsub(/[:\/\.\*#]/, '_')
|
15
|
+
end
|
16
|
+
|
17
|
+
##
|
18
|
+
# The method helps to get snake case string from camel case one.
|
19
|
+
#
|
20
|
+
# @param [String] path
|
21
|
+
#
|
22
|
+
# @return [String] snake_cased_string
|
23
|
+
def self.get_snake_case(string)
|
24
|
+
string.to_s.gsub(/::/, '/').
|
25
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
26
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
27
|
+
tr('-', '_').
|
28
|
+
downcase
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: browser_web_data_entity_sumarization
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0beta1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Marek Filteš
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-04-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - '='
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 2.1.0
|
19
|
+
name: sparql-client
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.1.0
|
27
|
+
description:
|
28
|
+
email: marek.filtes@gmail.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- lib/browser_web_data_entity_sumarization.rb
|
34
|
+
- lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb
|
35
|
+
- lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb
|
36
|
+
- lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb
|
37
|
+
- lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb
|
38
|
+
- lib/browser_web_data_entity_sumarization/sparql_request.rb
|
39
|
+
- lib/browser_web_data_entity_sumarization/version.rb
|
40
|
+
- lib/config/entity_sumarization_config.rb
|
41
|
+
- lib/knowledge/classes_hierarchy.json
|
42
|
+
- lib/knowledge/common_properties.json
|
43
|
+
- lib/knowledge/entity_classes.json
|
44
|
+
- lib/knowledge/knowledge_base.json
|
45
|
+
- lib/utils/cache_helper.rb
|
46
|
+
- lib/utils/hash_helper.rb
|
47
|
+
- lib/utils/sparql_queries.rb
|
48
|
+
- lib/utils/string_helper.rb
|
49
|
+
homepage:
|
50
|
+
licenses:
|
51
|
+
- MIT
|
52
|
+
metadata: {}
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
- results
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: 1.3.1
|
68
|
+
requirements: []
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 2.4.8
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Tool for entity sumarization.
|
74
|
+
test_files: []
|