browser_web_data_entity_sumarization 1.0.0beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ #encoding: utf-8
2
+
3
+ # Module CacheHelper
4
+ module CacheHelper
5
+
6
+ ###
7
+ # The method clear all application cache
8
+ #
9
+ # @param [String] type File extension type.
10
+ def self.clear_cache(type = '.json')
11
+ dir = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}/*#{type}"
12
+ Dir.glob(dir).each { |path|
13
+ FileUtils.rm_f(path)
14
+ }
15
+ end
16
+
17
+ ##
18
+ # The method helps to load cached json.
19
+ # This cache is permanent and reload only if no exist or set demand_reload
20
+ #
21
+ # @param [UU::OS::UESURI] binary_ap_uri UU::OS::UESURI of property to load with caching.
22
+ # @param [Hash] params Load parameters
23
+ # @option params [Hash] :json Optional parameters. Json parse attributes. Default is {symbolize_names:true}.
24
+ # @option params [Fixnum] :ttl Optional parameters. Time to live in second, for this time duration will be load property from json_cache file. Default is 10800.
25
+ # @option params [Boolean] :demanded_reload Optional parameters. Flag to reload value from property. Default is false.
26
+ #
27
+ # @return [Hash] hash_value
28
+ #
29
+ # @yield return value must be Hash
30
+ def self.load_cached(key, params = {}, &block)
31
+ default_load_attrs = {
32
+ update: false,
33
+ json: {symbolize_names: true},
34
+ ttl: 0,
35
+ demanded_reload: false
36
+ }
37
+ params = default_load_attrs.merge(params)
38
+ hash = {}
39
+
40
+ cache_dir_path = "#{Dir.tmpdir}/#{BrowserWebData::TMP_DIR}"
41
+ Dir.mkdir(cache_dir_path) unless Dir.exist?(cache_dir_path)
42
+ cache_file_path = "#{cache_dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
43
+
44
+ if params[:demanded_reload] || !File.exists?(cache_file_path) || (params[:ttl] && Time.now - File.ctime(cache_file_path) > params[:ttl])
45
+
46
+ if block_given?
47
+ hash = yield hash
48
+ File.open(cache_file_path, 'w') { |f| f.puts hash.to_json } unless hash.empty?
49
+ end
50
+ else
51
+ hash = JSON.parse(File.read(cache_file_path).force_encoding('UTF-8'), params[:json])
52
+
53
+ end
54
+
55
+ HashHelper.recursive_symbolize_keys(hash)
56
+ end
57
+
58
+ ###
59
+ # The method helps to get build in knowledge by key.
60
+ #
61
+ # @param [String] key
62
+ def self.load_knowledge(key)
63
+ dir_path = "#{File.dirname(File.expand_path('..', __FILE__))}/knowledge"
64
+ file_path = "#{dir_path}/#{StringHelper.get_clear_file_path(key)}.json"
65
+
66
+ JSON.parse(File.read(file_path), symbolize_names: true)
67
+ end
68
+
69
+ end
@@ -0,0 +1,79 @@
1
+ #encoding: utf-8
2
+
3
+
4
+ # Module HashHelper
5
+ module HashHelper
6
+
7
+ ##
8
+ # The method helps to get new sorted hash by key.
9
+ #
10
+ # @param [Hash] hash Input hash which will be sorted.
11
+ # @param [Symbol, String] type Type of sorting, default is asc as ascending. One of [:asc, :desc]
12
+ #
13
+ # @return [Hash] sorted_hash
14
+ def self.get_sorted(hash, type = :asc)
15
+ hash = {} unless hash
16
+ case type.to_s.downcase.to_sym
17
+ when :asc
18
+ Hash[hash.sort]
19
+ when :desc
20
+ Hash[hash.sort{|a,b| a<=>b}]
21
+ else
22
+ hash
23
+ end
24
+ end
25
+
26
+ ##
27
+ # The method recursively symbolizes keys of hash.
28
+ #
29
+ # @param [Hash, Enumerable] input_value Data to by symbolized.
30
+ # @return [Hash, Enumerable] Symbolized data.
31
+ def self.recursive_symbolize_keys(input_value)
32
+ case input_value
33
+ when Hash
34
+ Hash[
35
+ input_value.map do |k, v|
36
+ [k.respond_to?(:to_sym) ? k.to_sym : k, recursive_symbolize_keys(v)]
37
+ end
38
+ ]
39
+ when Enumerable
40
+ input_value.map { |v| recursive_symbolize_keys(v) }
41
+ else
42
+ input_value
43
+ end
44
+ end
45
+
46
+ ##
47
+ # The method recursively unsymbolizes keys of hash.
48
+ #
49
+ # @param [Hash, Enumerable] input_value Data to by symbolized.
50
+ # @return [Hash, Enumerable] Symbolized data.
51
+ def self.recursive_unsymbolize_keys(input_value)
52
+ case input_value
53
+ when Hash
54
+ Hash[
55
+ input_value.map do |k, v|
56
+ [k.respond_to?(:to_s) ? k.to_s : k, recursive_unsymbolize_keys(v)]
57
+ end
58
+ ]
59
+ when Enumerable
60
+ input_value.map { |v| recursive_unsymbolize_keys(v) }
61
+ else
62
+ input_value
63
+ end
64
+ end
65
+
66
+
67
+ def self.recursive_map_keys(data)
68
+ data.map{|k,v|
69
+ if v.is_a?(Hash) && !v.empty?
70
+ inner_array = recursive_map_keys(v)
71
+ else
72
+ inner_array = []
73
+ end
74
+
75
+ [k] + inner_array
76
+ }.reduce(:+)
77
+ end
78
+
79
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: utf-8
2
+
3
+ module SPARQLQueries
4
+
5
+ def resources_by_dbpedia_page_rank(entity_type, limit = 10)
6
+ entity_type = entity_type['http'] ? "<#{entity_type}>" : "dbo:#{entity_type}"
7
+
8
+ " PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
9
+ PREFIX dbo:<http://dbpedia.org/ontology/>
10
+ PREFIX vrank:<http://purl.org/voc/vrank#>
11
+
12
+ SELECT ?entity ?rank
13
+ FROM <http://dbpedia.org>
14
+ FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
15
+ WHERE {
16
+ ?entity rdf:type #{entity_type}.
17
+ ?entity vrank:hasRank/vrank:rankValue ?rank.
18
+ }
19
+ ORDER BY DESC(?rank) LIMIT #{limit}"
20
+ end
21
+
22
+ def all_predicates_by_object(object)
23
+ object = object['http'] ? "<#{object}>" : "dbo:#{object}"
24
+
25
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
26
+ PREFIX dbp: <http://dbpedia.org/property/>
27
+
28
+ SELECT DISTINCT ?property
29
+
30
+ WHERE {
31
+ ?subject ?property #{object}.
32
+ }"
33
+ end
34
+
35
+ def all_predicates_by_subject(subject, only_literal)
36
+ subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
37
+ filter = only_literal ? 'FILTER(isLiteral(?object))' : nil
38
+
39
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
40
+ PREFIX dbp: <http://dbpedia.org/property/>
41
+
42
+ SELECT DISTINCT ?property
43
+
44
+ WHERE {
45
+ #{subject} ?property ?object.
46
+ #{filter}
47
+ }"
48
+ end
49
+
50
+ def all_predicates_by_object_and_subject(subject, object)
51
+ subject = subject['http'] ? "<#{subject}>" : "dbo:#{subject}"
52
+ object = object['http'] ? "<#{object}>" : "dbo:#{object}"
53
+
54
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
55
+ PREFIX dbp: <http://dbpedia.org/property/>
56
+
57
+ SELECT DISTINCT ?property
58
+
59
+ WHERE {
60
+ #{subject} ?property #{object}.
61
+ }"
62
+ end
63
+
64
+ def count_predicate_by_entity(entity_class, predicate)
65
+ entity_class = entity_class['http'] ? "<#{entity_class}>" : "dbo:#{entity_class}"
66
+ predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
67
+
68
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
69
+ PREFIX dbp: <http://dbpedia.org/property/>
70
+
71
+ SELECT DISTINCT COUNT(?subject) as ?count
72
+
73
+ WHERE {
74
+ ?subject a #{entity_class} .
75
+ {?subject #{predicate} ?a .} UNION {?b #{predicate} ?subject .}
76
+ }
77
+
78
+ ORDER BY DESC(?count)"
79
+ end
80
+
81
+ def count_of_identical_predicates(predicates)
82
+ predicates = [predicates] unless predicates.is_a?(Array)
83
+ where_part = predicates.map{|predicate|
84
+ predicate = predicate['http'] ? "<#{predicate}>" : "dbo:#{predicate}"
85
+ "?subject #{predicate} ?object ."
86
+ }.join("\n")
87
+
88
+ " SELECT COUNT(DISTINCT ?subject) AS ?count
89
+ WHERE{#{where_part}
90
+ }"
91
+ end
92
+
93
+ def resource_properties(resource, lang = 'en')
94
+ resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}>"
95
+
96
+ " PREFIX dbo: <http://dbpedia.org/ontology/>
97
+ PREFIX dbp: <http://dbpedia.org/property/>
98
+ SELECT DISTINCT ?predicate, ?predicate_label, ?value, ?value_label
99
+ WHERE {
100
+ { #{resource} ?predicate ?value . } UNION { ?value ?predicate #{resource} . }
101
+
102
+ OPTIONAL{
103
+ ?value rdfs:label ?value_label .
104
+ FILTER (lang(?value_label) = '#{lang}')
105
+ }
106
+
107
+ ?predicate rdfs:label ?predicate_label .
108
+ FILTER (lang(?predicate_label) = '#{lang}')
109
+ }"
110
+ end
111
+
112
+ def entity_classes(resource)
113
+ resource = resource['http'] ? "<#{resource}>" : "<http://dbpedia.org/resource/#{resource}"
114
+
115
+ " SELECT DISTINCT ?entity_class
116
+ WHERE {
117
+ #{resource} a ?entity_class .
118
+ ?entity_class a owl:Class .
119
+ }"
120
+ end
121
+
122
+ def self.included(base)
123
+ base.extend SPARQLQueries
124
+ end
125
+
126
+ end
@@ -0,0 +1,31 @@
1
+ #encoding: utf-8
2
+
3
+
4
+ # Module StringHelper
5
+ module StringHelper
6
+
7
+ ##
8
+ # The method helps to replace problematic chars from string to be used as part of file path.
9
+ #
10
+ # @param [String] path
11
+ #
12
+ # @return [String] path
13
+ def self.get_clear_file_path(path)
14
+ path.to_s.gsub(/[:\/\.\*#]/, '_')
15
+ end
16
+
17
+ ##
18
+ # The method helps to get snake case string from camel case one.
19
+ #
20
+ # @param [String] path
21
+ #
22
+ # @return [String] snake_cased_string
23
+ def self.get_snake_case(string)
24
+ string.to_s.gsub(/::/, '/').
25
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
26
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
27
+ tr('-', '_').
28
+ downcase
29
+ end
30
+
31
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: browser_web_data_entity_sumarization
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0beta1
5
+ platform: ruby
6
+ authors:
7
+ - Marek Filteš
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-04-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - '='
17
+ - !ruby/object:Gem::Version
18
+ version: 2.1.0
19
+ name: sparql-client
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.1.0
27
+ description:
28
+ email: marek.filtes@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/browser_web_data_entity_sumarization.rb
34
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_nif_parser.rb
35
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_predicate.rb
36
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_predicates_similarity.rb
37
+ - lib/browser_web_data_entity_sumarization/entity_sumarization_statistics.rb
38
+ - lib/browser_web_data_entity_sumarization/sparql_request.rb
39
+ - lib/browser_web_data_entity_sumarization/version.rb
40
+ - lib/config/entity_sumarization_config.rb
41
+ - lib/knowledge/classes_hierarchy.json
42
+ - lib/knowledge/common_properties.json
43
+ - lib/knowledge/entity_classes.json
44
+ - lib/knowledge/knowledge_base.json
45
+ - lib/utils/cache_helper.rb
46
+ - lib/utils/hash_helper.rb
47
+ - lib/utils/sparql_queries.rb
48
+ - lib/utils/string_helper.rb
49
+ homepage:
50
+ licenses:
51
+ - MIT
52
+ metadata: {}
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ - results
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">"
66
+ - !ruby/object:Gem::Version
67
+ version: 1.3.1
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.8
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Tool for entity sumarization.
74
+ test_files: []