solis 0.97.0 → 0.99.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/solis/query/run.rb +255 -88
- data/lib/solis/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9c57f5cfabe93ada0f5aa3c008ecafa1b130147dfc7ca5b77930362a330a3457
|
|
4
|
+
data.tar.gz: 7b9854033bb6e4677bde52c4557705f4f48adffe2e0988af33600890c4615984
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6f527bc987723ee900f250c8361a7570df65e3f6238c8ccc560c65097f94e0941a43540222e35e26196f4a70ed94d193040d55f0b2455f38f8ea2b2c39466e2f
|
|
7
|
+
data.tar.gz: 404047145bf332a80ee0bee495640071137682f009a1a17de7e21b8d531a2b3cf57aec7c704a423366f778c2cc2259267ed1e67f638fc092c38858c611f72a81
|
data/lib/solis/query/run.rb
CHANGED
|
@@ -2,114 +2,281 @@ require 'solis/store/sparql/client'
|
|
|
2
2
|
require 'solis/config_file'
|
|
3
3
|
|
|
4
4
|
class Solis::Query::Runner
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
5
|
+
def self.run(entity, query, options = {})
|
|
6
|
+
result = {}
|
|
7
|
+
|
|
8
|
+
c = Solis::Store::Sparql::Client.new(Solis::Options.instance.get[:sparql_endpoint], graph_name: graph_name)
|
|
9
|
+
r = c.query(query, options)
|
|
10
|
+
|
|
11
|
+
if r.is_a?(SPARQL::Client)
|
|
12
|
+
result = direct_transform_with_embedding(r, entity, options)
|
|
13
|
+
else
|
|
14
|
+
t = r.map(&:to_h)
|
|
15
|
+
result = sanitize_result({'@graph' => t})
|
|
16
|
+
end
|
|
17
|
+
result
|
|
18
|
+
rescue StandardError => e
|
|
19
|
+
puts e.message
|
|
20
|
+
raise e
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.direct_transform_with_embedding(client, entity, options = {})
|
|
24
|
+
results = client.query('select * where{?s ?p ?o}')
|
|
25
|
+
|
|
26
|
+
# Step 1: Group all triples by subject
|
|
27
|
+
grouped = group_by_subject(results)
|
|
28
|
+
|
|
29
|
+
# Step 2: Build objects index (without embedding yet)
|
|
30
|
+
objects_index = build_objects_index(grouped)
|
|
31
|
+
|
|
32
|
+
# Step 3: Embed references recursively
|
|
33
|
+
max_depth = options[:max_embed_depth] || 10
|
|
34
|
+
root_subjects = find_root_subjects(grouped, entity)
|
|
35
|
+
|
|
36
|
+
root_subjects.map do |subject|
|
|
37
|
+
embed_references(objects_index[subject], objects_index, max_depth, Set.new)
|
|
38
|
+
end.compact
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def self.group_by_subject(results)
|
|
44
|
+
results.each_with_object({}) do |solution, acc|
|
|
45
|
+
subject = solution.s.to_s
|
|
46
|
+
acc[subject] ||= []
|
|
47
|
+
acc[subject] << { predicate: solution.p, object: solution.o }
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.build_objects_index(grouped)
|
|
52
|
+
grouped.each_with_object({}) do |(subject, triples), index|
|
|
53
|
+
obj = {
|
|
54
|
+
'_id' => subject, # Full URI for resolution
|
|
55
|
+
'id' => nil, # Will be set from predicate if exists
|
|
56
|
+
'@subject' => subject, # Internal marker for reference resolution
|
|
57
|
+
'@type' => nil
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
triples.each do |triple|
|
|
61
|
+
predicate = triple[:predicate]
|
|
62
|
+
object = triple[:object]
|
|
63
|
+
|
|
64
|
+
# Handle rdf:type
|
|
65
|
+
if predicate.to_s =~ /type$/i || predicate == RDF::RDFV.type
|
|
66
|
+
obj['@type'] = object.to_s.split('/').last
|
|
67
|
+
next
|
|
25
68
|
end
|
|
26
69
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
70
|
+
# Get predicate name (last part of URI)
|
|
71
|
+
pred_name = predicate.to_s.split('/').last.underscore
|
|
72
|
+
|
|
73
|
+
# Extract value
|
|
74
|
+
value = if object.is_a?(RDF::URI)
|
|
75
|
+
{ '@ref' => object.to_s } # Mark as reference for later resolution
|
|
76
|
+
else
|
|
77
|
+
extract_value(object)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Capture the 'id' predicate value specifically
|
|
81
|
+
if pred_name == 'id'
|
|
82
|
+
obj['id'] = value
|
|
83
|
+
next
|
|
30
84
|
end
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
85
|
+
|
|
86
|
+
# Handle multiple values for same predicate
|
|
87
|
+
if obj.key?(pred_name)
|
|
88
|
+
obj[pred_name] = [obj[pred_name]] unless obj[pred_name].is_a?(Array)
|
|
89
|
+
obj[pred_name] << value
|
|
90
|
+
else
|
|
91
|
+
obj[pred_name] = value
|
|
36
92
|
end
|
|
37
|
-
result = sanitize_result({'@graph' => t})
|
|
38
93
|
end
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
94
|
+
|
|
95
|
+
# Fallback: if no 'id' predicate was found, extract from URI
|
|
96
|
+
if obj['id'].nil?
|
|
97
|
+
obj['id'] = subject.split('/').last
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
if obj['@type'].nil?
|
|
101
|
+
obj['@type'] = subject.split('/')[-2].classify
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
index[subject] = obj
|
|
43
105
|
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def self.find_root_subjects(grouped, entity)
|
|
109
|
+
# Find subjects that match the requested entity type
|
|
110
|
+
grouped.select do |subject, triples|
|
|
111
|
+
type_triple = triples.find { |t| t[:predicate].to_s =~ /type$/i || t[:predicate] == RDF::RDFV.type }
|
|
112
|
+
next false unless type_triple
|
|
44
113
|
|
|
45
|
-
|
|
114
|
+
type_name = type_triple[:object].to_s.split('/').last
|
|
115
|
+
type_name.downcase == entity.downcase ||
|
|
116
|
+
type_name.tableize == entity.tableize ||
|
|
117
|
+
type_name == entity
|
|
118
|
+
end.keys
|
|
119
|
+
end
|
|
46
120
|
|
|
47
|
-
|
|
48
|
-
|
|
121
|
+
def self.embed_references(obj, objects_index, max_depth, visited, current_depth = 0)
|
|
122
|
+
return nil if obj.nil?
|
|
49
123
|
|
|
50
|
-
|
|
124
|
+
subject = obj['@subject']
|
|
125
|
+
|
|
126
|
+
# At max depth, return minimal reference with both IDs
|
|
127
|
+
if current_depth >= max_depth
|
|
128
|
+
#return { '_id' => obj['_id'], 'id' => obj['id'], '@type' => obj['@type'] }
|
|
129
|
+
return { '_id' => obj['_id'], 'id' => obj['id'] }
|
|
51
130
|
end
|
|
52
131
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
132
|
+
# Circular reference detection
|
|
133
|
+
if visited.include?(subject)
|
|
134
|
+
# Return a reference object instead of embedding
|
|
135
|
+
#return { '_id' => obj['_id'], 'id' => obj['id'], '@type' => obj['@type'] }
|
|
136
|
+
return { '_id' => obj['_id'], 'id' => obj['id'] }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
visited = visited.dup
|
|
140
|
+
visited.add(subject)
|
|
141
|
+
|
|
142
|
+
# Create clean copy without internal markers (except _id)
|
|
143
|
+
result = {
|
|
144
|
+
'_id' => obj['_id'],
|
|
145
|
+
'id' => obj['id']
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
obj.each do |key, value|
|
|
149
|
+
next if key.start_with?('@') # Skip internal markers
|
|
150
|
+
next if key == '_id' || key == 'id' # Already added
|
|
151
|
+
|
|
152
|
+
result[key] = resolve_value(value, objects_index, max_depth, visited, current_depth)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
result
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def self.resolve_value(value, objects_index, max_depth, visited, current_depth)
|
|
159
|
+
case value
|
|
160
|
+
when Array
|
|
161
|
+
value.map { |v| resolve_value(v, objects_index, max_depth, visited, current_depth) }
|
|
162
|
+
when Hash
|
|
163
|
+
if value.key?('@ref')
|
|
164
|
+
# This is a reference - try to embed it
|
|
165
|
+
ref_uri = value['@ref']
|
|
166
|
+
referenced_obj = objects_index[ref_uri]
|
|
167
|
+
|
|
168
|
+
if referenced_obj
|
|
169
|
+
embed_references(referenced_obj, objects_index, max_depth, visited, current_depth + 1)
|
|
170
|
+
else
|
|
171
|
+
# External reference - return both IDs
|
|
172
|
+
{ '_id' => ref_uri, 'id' => ref_uri.split('/').last }
|
|
173
|
+
end
|
|
174
|
+
else
|
|
175
|
+
# Regular hash - recurse
|
|
176
|
+
value.transform_values { |v| resolve_value(v, objects_index, max_depth, visited, current_depth) }
|
|
177
|
+
end
|
|
178
|
+
else
|
|
179
|
+
value
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def self.extract_value(literal)
|
|
184
|
+
return literal.to_s if literal.is_a?(RDF::URI)
|
|
185
|
+
|
|
186
|
+
datatype = literal.datatype&.to_s
|
|
187
|
+
|
|
188
|
+
case datatype
|
|
189
|
+
when "http://www.w3.org/2001/XMLSchema#dateTime"
|
|
190
|
+
DateTime.parse(literal.value)
|
|
191
|
+
when "http://www.w3.org/2001/XMLSchema#date"
|
|
192
|
+
Date.parse(literal.value)
|
|
193
|
+
when "http://www.w3.org/2001/XMLSchema#boolean"
|
|
194
|
+
literal.value == "true"
|
|
195
|
+
when "http://www.w3.org/2001/XMLSchema#integer", "http://www.w3.org/2001/XMLSchema#int"
|
|
196
|
+
literal.value.to_i
|
|
197
|
+
when "http://www.w3.org/2001/XMLSchema#float", "http://www.w3.org/2001/XMLSchema#double", "http://www.w3.org/2001/XMLSchema#decimal"
|
|
198
|
+
literal.value.to_f
|
|
199
|
+
when "http://www.w3.org/2006/time#DateTimeInterval"
|
|
200
|
+
ISO8601::TimeInterval.parse(literal.value).to_s
|
|
201
|
+
when "http://www.w3.org/1999/02/22-rdf-syntax-ns#JSON"
|
|
202
|
+
JSON.parse(literal.value) rescue literal.value
|
|
203
|
+
else
|
|
204
|
+
# Handle language-tagged strings
|
|
205
|
+
if literal.respond_to?(:language) && literal.language
|
|
206
|
+
{ '@value' => literal.value, '@language' => literal.language.to_s }
|
|
207
|
+
else
|
|
208
|
+
literal.value
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
rescue StandardError => e
|
|
212
|
+
Solis::LOGGER.warn("Error extracting value: #{e.message}")
|
|
213
|
+
literal.to_s
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def self.graph_name
|
|
217
|
+
Solis::Options.instance.get.key?(:graphs) ? Solis::Options.instance.get[:graphs].select { |s| s['type'].eql?(:main) }&.first['name'] : ''
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Keep original methods for backward compatibility
|
|
221
|
+
def self.sanitize_result(framed)
|
|
222
|
+
data = framed&.key?('@graph') ? framed['@graph'] : [framed]
|
|
223
|
+
sanitatize_data_in_result(data)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def self.sanitatize_data_in_result(data)
|
|
227
|
+
data.map do |d|
|
|
228
|
+
d.delete_if { |e| e =~ /^@/ }
|
|
229
|
+
if d.is_a?(Hash)
|
|
230
|
+
new_d = {}
|
|
231
|
+
d.each do |k, v|
|
|
232
|
+
if v.is_a?(Hash)
|
|
233
|
+
if v.key?('@type')
|
|
234
|
+
type = v['@type']
|
|
235
|
+
if v.key?('@value')
|
|
236
|
+
value = v['@value']
|
|
237
|
+
case type
|
|
238
|
+
when "http://www.w3.org/2001/XMLSchema#dateTime"
|
|
239
|
+
value = DateTime.parse(value)
|
|
240
|
+
when "http://www.w3.org/2001/XMLSchema#date"
|
|
241
|
+
value = Date.parse(value)
|
|
242
|
+
when "http://www.w3.org/2006/time#DateTimeInterval"
|
|
243
|
+
value = ISO8601::TimeInterval.parse(value)
|
|
244
|
+
when "http://www.w3.org/2001/XMLSchema#boolean"
|
|
245
|
+
value = value == "true"
|
|
75
246
|
end
|
|
76
|
-
v =
|
|
77
|
-
end
|
|
78
|
-
if v.is_a?(Hash)
|
|
79
|
-
new_d[k] = v.class.method_defined?(:value) ? v.value : sanitize_result(v)
|
|
80
|
-
else
|
|
81
|
-
new_d[k] = v.class.method_defined?(:value) ? v.value : v
|
|
247
|
+
v = value
|
|
82
248
|
end
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
249
|
+
v = sanitize_result(v) if v.is_a?(Hash)
|
|
250
|
+
end
|
|
251
|
+
if v.is_a?(Hash)
|
|
252
|
+
new_d[k] = v.class.method_defined?(:value) ? v.value : sanitize_result(v)
|
|
253
|
+
else
|
|
254
|
+
new_d[k] = v.class.method_defined?(:value) ? v.value : v
|
|
255
|
+
end
|
|
256
|
+
elsif v.is_a?(Array)
|
|
257
|
+
new_d[k] = []
|
|
258
|
+
v.each do |vt|
|
|
259
|
+
if vt.is_a?(Hash)
|
|
260
|
+
if vt.key?('@value')
|
|
261
|
+
new_d[k] << vt['@value']
|
|
92
262
|
else
|
|
93
263
|
new_d[k] << (vt.is_a?(String) ? vt : sanitize_result(vt))
|
|
94
264
|
end
|
|
265
|
+
else
|
|
266
|
+
new_d[k] << (vt.is_a?(String) ? vt : sanitize_result(vt))
|
|
95
267
|
end
|
|
96
|
-
new_d[k].flatten!
|
|
97
|
-
else
|
|
98
|
-
new_d[k] = v.class.method_defined?(:value) ? v.value : v
|
|
99
268
|
end
|
|
269
|
+
new_d[k].flatten!
|
|
270
|
+
else
|
|
271
|
+
new_d[k] = v.class.method_defined?(:value) ? v.value : v
|
|
100
272
|
end
|
|
101
|
-
d = new_d
|
|
102
273
|
end
|
|
103
|
-
|
|
104
|
-
d
|
|
274
|
+
d = new_d
|
|
105
275
|
end
|
|
106
|
-
|
|
107
|
-
Solis::LOGGER.error(e.message)
|
|
108
|
-
data
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def self.graph_name
|
|
112
|
-
raise Solis::Error::NotFoundError, 'No graph name found' if Solis::Options.instance.get.key?(:graphs).nil?
|
|
113
|
-
Solis::Options.instance.get.key?(:graphs) ? Solis::Options.instance.get[:graphs].select{|s| s['type'].eql?(:main)}&.first['name'] : ''
|
|
276
|
+
d
|
|
114
277
|
end
|
|
278
|
+
rescue StandardError => e
|
|
279
|
+
Solis::LOGGER.error(e.message)
|
|
280
|
+
data
|
|
281
|
+
end
|
|
115
282
|
end
|
data/lib/solis/version.rb
CHANGED