triple_parser 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +43 -0
- data/lib/main.rb +16 -0
- data/lib/triple_parser/bracketed_url_splitter.rb +183 -0
- data/lib/triple_parser/colon_separated_splitter.rb +95 -0
- data/lib/triple_parser/regional_text_splitter.rb +41 -0
- data/lib/triple_parser/settings.rb +12 -0
- data/lib/triple_parser/splitter.rb +52 -0
- data/lib/triple_parser/t_maker.rb +63 -0
- data/lib/triple_parser/third.rb +17 -0
- data/lib/triple_parser/to_rdf.rb +133 -0
- data/lib/triple_parser/triple_set.rb +123 -0
- data/lib/triple_parser/unspecified_splitter.rb +27 -0
- data/lib/triple_parser/variable_splitter.rb +30 -0
- data/lib/triple_parser.rb +46 -0
- metadata +58 -0
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
Triple parser
|
2
|
+
=============
|
3
|
+
|
4
|
+
Converts text containing RDF triples between simple and complex versions
|
5
|
+
|
6
|
+
For example:
|
7
|
+
|
8
|
+
triples = <<EOF
|
9
|
+
id:112121212111111111111 owl:event:time id:234242342342334234242432
|
10
|
+
id:234242342342334234242432 rdf:type owl:timeline:Interval
|
11
|
+
id:234242342342334234242432 owl:timeline:beginsAtDateTime xml:date_time:'2010-02-15T12:00:00Z'
|
12
|
+
id:234242342342334234242432 owl:timeline:endsAtDateTime xml:date_time:'2010-02-17T12:00:00Z'
|
13
|
+
EOF
|
14
|
+
|
15
|
+
TripleParser.to_rdf(triples)
|
16
|
+
|
17
|
+
Outputs:
|
18
|
+
|
19
|
+
[
|
20
|
+
"<http://en.wikipedia.org/wiki/Triplestore/things/112121212111111111111#id> <http://purl.org/NET/c4dm/event.owl#time> <http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> .",
|
21
|
+
"<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/timeline.owl#Interval> .",
|
22
|
+
"<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#beginsAtDateTime> "2010-02-15T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .",
|
23
|
+
"<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#endsAtDateTime> "2010-02-17T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> ."
|
24
|
+
]
|
25
|
+
|
26
|
+
Setting site specific application url
|
27
|
+
-------------------------------------
|
28
|
+
The default application url is 'en.wikipedia.org/wiki/Triplestore' where you can read more about triplestores.
|
29
|
+
To change this to your site specific url, use this (in a Rails initializer for example):
|
30
|
+
|
31
|
+
TripleParser::Settings.application_domain = 'undervale.co.uk'
|
32
|
+
|
33
|
+
Playground
|
34
|
+
----------
|
35
|
+
A simple Sinatra site is included, where you can enter triples and see how they converted by TripleParser.to_rdf
|
36
|
+
|
37
|
+
To play:
|
38
|
+
|
39
|
+
ruby web.rb
|
40
|
+
|
41
|
+
The page can then be viewed at http://localhost:4567
|
42
|
+
|
43
|
+
Enter your triples in the text area and click submit. The output will appear below the text area
|
data/lib/main.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative 'triple_parser'
|
2
|
+
|
3
|
+
text = <<EOF
|
4
|
+
id:9108fe02-0bbb-4ed9-890f-b454877ce12c type_is Event
|
5
|
+
id:9108fe02-0bbb-4ed9-890f-b454877ce12c name_is string:'Troops tighten grip on Taliban stronghold'
|
6
|
+
id:9108fe02-0bbb-4ed9-890f-b454877ce12c has_time id:0237eb08-e4a5-463c-baaa-5a28f2b63707
|
7
|
+
<http://www.undervale.co.uk/things/9108fe02-0bbb-4ed9-890f-b454877ce12c#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/event.owl#Event>.
|
8
|
+
EOF
|
9
|
+
|
10
|
+
TripleParser.input(text)
|
11
|
+
|
12
|
+
triples = TripleParser.triples
|
13
|
+
|
14
|
+
triples.each do |t|
|
15
|
+
p t.object.value
|
16
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
require_relative 'splitter'
|
4
|
+
|
5
|
+
class BracketedUrlSplitter < Splitter
|
6
|
+
|
7
|
+
def self.can_split?(string)
|
8
|
+
bracketed_url_pattern =~ string
|
9
|
+
end
|
10
|
+
|
11
|
+
def rdf_style
|
12
|
+
'bracketed_url'
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def self.bracketed_url_pattern
|
17
|
+
/\<http\:\/\/[\S]+\>/
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_parts
|
21
|
+
type_value_from_bracketed_url.merge(
|
22
|
+
:url => get_url
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_url
|
27
|
+
url_pattern = /http\:\/\/[\w\-\.]+\.[a-zA-Z]{2,7}(?:\/[\w\-\._]+)*/
|
28
|
+
match(url_pattern)[0]
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def type_value_from_bracketed_url
|
33
|
+
if text_after_hash_pattern =~ self
|
34
|
+
type_value_from_text_after_hash_url
|
35
|
+
|
36
|
+
elsif resource_url_pattern =~ self
|
37
|
+
type_value_for_resource
|
38
|
+
|
39
|
+
elsif ontology_url_pattern =~ self
|
40
|
+
type_value_for_ontology
|
41
|
+
|
42
|
+
elsif dc_terms_pattern =~ self
|
43
|
+
type_value_for_dc_terms
|
44
|
+
|
45
|
+
elsif asset_pattern =~ self
|
46
|
+
type_value_for_asset
|
47
|
+
|
48
|
+
else
|
49
|
+
type_value_for_unknown_url
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def text_after_hash_pattern
|
55
|
+
/\#([a-zA-Z]+)/
|
56
|
+
end
|
57
|
+
|
58
|
+
def after_hash
|
59
|
+
@after_hash ||= match(text_after_hash_pattern)[1] if match(text_after_hash_pattern)
|
60
|
+
end
|
61
|
+
|
62
|
+
def type_value_from_text_after_hash_url
|
63
|
+
|
64
|
+
|
65
|
+
if after_hash == 'id'
|
66
|
+
type_value_for_id_after_hash
|
67
|
+
|
68
|
+
elsif xml_data_pattern =~ self
|
69
|
+
type_value_for_xml_schema
|
70
|
+
|
71
|
+
elsif rdf_url_pattern =~ self
|
72
|
+
type_value_for_rdf
|
73
|
+
|
74
|
+
elsif owl_pattern =~ self
|
75
|
+
type_value_for_owl
|
76
|
+
|
77
|
+
else
|
78
|
+
{}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def xml_data_pattern
|
83
|
+
/["'](.+)["']\^{2}\<http/
|
84
|
+
end
|
85
|
+
|
86
|
+
def type_value_for_xml_schema
|
87
|
+
{
|
88
|
+
:type => "xml:#{underscore(after_hash)}",
|
89
|
+
:value => match(xml_data_pattern)[1]
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
def owl_pattern
|
94
|
+
/\.owl#/
|
95
|
+
end
|
96
|
+
|
97
|
+
def type_value_for_owl
|
98
|
+
type = match(text_before_hash_pattern)[1]
|
99
|
+
{
|
100
|
+
:type => "owl:#{type.gsub(/\.owl/, "")}",
|
101
|
+
:value => after_hash
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def rdf_url_pattern
|
106
|
+
/rdf\-syntax\-ns/
|
107
|
+
end
|
108
|
+
|
109
|
+
def type_value_for_rdf
|
110
|
+
{
|
111
|
+
:type => 'rdf',
|
112
|
+
:value => after_hash
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def type_value_for_id_after_hash
|
117
|
+
{
|
118
|
+
:type => 'id',
|
119
|
+
:value => match(text_before_hash_pattern)[1]
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
def text_before_hash_pattern
|
124
|
+
/([\w\-\._]*)\#/
|
125
|
+
end
|
126
|
+
|
127
|
+
def type_value_for_resource
|
128
|
+
{
|
129
|
+
:type => match(resource_url_pattern)[1],
|
130
|
+
:value => match(last_element_of_url_pattern)[1]
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
def ontology_url_pattern
|
135
|
+
/data\.press\.net\/ontology\/(?:\w+\/)+(\w+)/
|
136
|
+
end
|
137
|
+
|
138
|
+
def type_value_for_ontology
|
139
|
+
{
|
140
|
+
:type => 'ontology',
|
141
|
+
:value => match(ontology_url_pattern)[1]
|
142
|
+
}
|
143
|
+
end
|
144
|
+
|
145
|
+
def dc_terms_pattern
|
146
|
+
/\/dc\/terms\/([a-zA-Z_]+)/
|
147
|
+
end
|
148
|
+
|
149
|
+
def type_value_for_dc_terms
|
150
|
+
{
|
151
|
+
:type => 'dc:terms',
|
152
|
+
:value => match(dc_terms_pattern)[1]
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
def asset_pattern
|
157
|
+
/\/ontologies\/asset\/([a-zA-Z_]+)/
|
158
|
+
end
|
159
|
+
|
160
|
+
def type_value_for_asset
|
161
|
+
{
|
162
|
+
:type => 'asset',
|
163
|
+
:value => match(asset_pattern)[1]
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
def type_value_for_unknown_url
|
168
|
+
{
|
169
|
+
:url => get_url
|
170
|
+
}
|
171
|
+
end
|
172
|
+
|
173
|
+
def last_element_of_url_pattern
|
174
|
+
/\/([a-zA-Z_]+)\>/
|
175
|
+
end
|
176
|
+
|
177
|
+
def resource_url_pattern
|
178
|
+
/(#{resource_identifiers.join('|')})\/[a-zA-Z_]+\>/
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
require_relative 'splitter'
|
4
|
+
|
5
|
+
class ColonSeparatedSplitter < Splitter
|
6
|
+
|
7
|
+
def self.can_split?(string)
|
8
|
+
colon_separated_rdf_pattern =~ string
|
9
|
+
end
|
10
|
+
|
11
|
+
def rdf_style
|
12
|
+
'colon_separated'
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def self.colon_separated_rdf_pattern
|
17
|
+
colon_pair = '[a-z\-]+:[\w-]+' # this:example
|
18
|
+
colon_followed_by_quoted_string = %q{:(['"].+['"]|\w+)} # :'this example'
|
19
|
+
function = '\(.+\)' # (this, example)
|
20
|
+
colon_separated_text = "#{colon_pair}(#{colon_followed_by_quoted_string}|#{function})?" # this:example or this:example:'with string' or this:function(example)
|
21
|
+
|
22
|
+
Regexp.new(colon_separated_text)
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_parts
|
26
|
+
if function_pattern =~ self
|
27
|
+
get_parts_for_function
|
28
|
+
|
29
|
+
elsif ontologies.include?(self)
|
30
|
+
get_ontology
|
31
|
+
|
32
|
+
elsif double_colon_first_elements.include?(before_colon)
|
33
|
+
get_double_colon_entry
|
34
|
+
|
35
|
+
elsif include?(':')
|
36
|
+
get_parts_for_type_value_pair
|
37
|
+
|
38
|
+
else
|
39
|
+
raise "Unable to get parts from '#{self}'"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def function_pattern
|
44
|
+
/^[\w_:]+\(.+\)/
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_parts_for_function
|
48
|
+
opening_bracket = index('(')
|
49
|
+
closing_bracket = index(')')
|
50
|
+
name = self[0, opening_bracket]
|
51
|
+
@arguments = self[opening_bracket + 1..closing_bracket - 1].split(/[\,\s]+/)
|
52
|
+
@arguments = @arguments.collect!{|r| TMaker.brew(r)}
|
53
|
+
{:type => 'function', :value => name}
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_parts_for_type_value_pair
|
57
|
+
{:type => before_colon, :value => after_colon}
|
58
|
+
end
|
59
|
+
|
60
|
+
def before_colon
|
61
|
+
@before_colon ||= self[0, (index(':'))] if include?(':')
|
62
|
+
end
|
63
|
+
|
64
|
+
def after_colon
|
65
|
+
@after_colon ||= self[index(':') + 1..length] if include?(':')
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_bracketing_quotes(text)
|
69
|
+
if /^'.+'$/ =~ text || /^".+"$/ =~ text
|
70
|
+
return text.gsub(/^['"]/, "").gsub(/['"]$/, "")
|
71
|
+
else
|
72
|
+
return text
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def double_colon_first_elements
|
77
|
+
%w{xml owl dc text}
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_double_colon_entry
|
81
|
+
elements = split(':')
|
82
|
+
text_after_second_colon = elements[2..elements.length].join(':')
|
83
|
+
{
|
84
|
+
:type => "#{elements[0]}:#{elements[1]}",
|
85
|
+
:value => remove_bracketing_quotes(text_after_second_colon)
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_ontology
|
90
|
+
{:type => 'ontology', :value => self}
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
class RegionalTextSplitter < Splitter
|
4
|
+
|
5
|
+
def self.can_split?(string)
|
6
|
+
quoted_text_ampersand_and_language_identifier =~ string
|
7
|
+
end
|
8
|
+
|
9
|
+
def rdf_style
|
10
|
+
'regional_text'
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def self.quoted_text_ampersand_and_language_identifier
|
15
|
+
/('.*'|".*")\@[\w\-_]+/
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_parts
|
19
|
+
{
|
20
|
+
:type => "text:#{region}",
|
21
|
+
:value => text
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def region
|
26
|
+
after_ampersand
|
27
|
+
end
|
28
|
+
|
29
|
+
def text
|
30
|
+
before_ampersand.gsub(/['"]/, "")
|
31
|
+
end
|
32
|
+
|
33
|
+
def after_ampersand
|
34
|
+
split(/@/).last
|
35
|
+
end
|
36
|
+
|
37
|
+
def before_ampersand
|
38
|
+
split(/@/).first
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
|
2
|
+
module TripleParser
|
3
|
+
class Splitter < String
|
4
|
+
|
5
|
+
attr_accessor :parts, :arguments, :rdf_style, :url
|
6
|
+
|
7
|
+
def self.can_split?(string)
|
8
|
+
raise "Need to define test to determine if string can be converted using this class"
|
9
|
+
end
|
10
|
+
|
11
|
+
def parts
|
12
|
+
@parts ||= get_parts
|
13
|
+
end
|
14
|
+
|
15
|
+
def type
|
16
|
+
parts[:type]
|
17
|
+
end
|
18
|
+
|
19
|
+
def value
|
20
|
+
parts[:value]
|
21
|
+
end
|
22
|
+
|
23
|
+
def url
|
24
|
+
parts[:url]
|
25
|
+
end
|
26
|
+
|
27
|
+
def rdf_style
|
28
|
+
raise "Need to define string that identifies rdf stype"
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def get_parts
|
33
|
+
raise "Need to define how to split text input into seperate parts"
|
34
|
+
end
|
35
|
+
|
36
|
+
def underscore(text)
|
37
|
+
while letter_before_capital = text.index(/[a-z][A-Z]/)
|
38
|
+
text.insert(letter_before_capital + 1, '_')
|
39
|
+
end
|
40
|
+
text.downcase
|
41
|
+
end
|
42
|
+
|
43
|
+
def resource_identifiers
|
44
|
+
%w{resource domain}
|
45
|
+
end
|
46
|
+
|
47
|
+
def ontologies
|
48
|
+
%w{about mentions}
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module TripleParser
|
2
|
+
class TMaker
|
3
|
+
require_relative 'third'
|
4
|
+
require_relative 'bracketed_url_splitter'
|
5
|
+
require_relative 'colon_separated_splitter'
|
6
|
+
require_relative 'variable_splitter'
|
7
|
+
require_relative 'unspecified_splitter'
|
8
|
+
require_relative 'regional_text_splitter'
|
9
|
+
|
10
|
+
attr_accessor :arguments, :rdf_style, :url
|
11
|
+
|
12
|
+
def self.brew(*args)
|
13
|
+
t_maker = new(*args)
|
14
|
+
t_maker.third
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(*args)
|
18
|
+
@string = args.first
|
19
|
+
end
|
20
|
+
|
21
|
+
def third
|
22
|
+
begin
|
23
|
+
Third.new(
|
24
|
+
@string,
|
25
|
+
:type => split_text.type,
|
26
|
+
:value => split_text.value,
|
27
|
+
:url => split_text.url,
|
28
|
+
:rdf_style => split_text.rdf_style,
|
29
|
+
:arguments => split_text.arguments
|
30
|
+
)
|
31
|
+
rescue
|
32
|
+
Third.new(@string, :rdf_style => 'unknown')
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def split_text
|
37
|
+
@split_text ||= get_split_text
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def get_split_text
|
42
|
+
splitters.each do |splitter|
|
43
|
+
if splitter.can_split?(@string)
|
44
|
+
split_text = splitter.new(@string)
|
45
|
+
return split_text
|
46
|
+
end
|
47
|
+
end
|
48
|
+
raise "Unable to get parts for third from: '#{@string}'"
|
49
|
+
end
|
50
|
+
|
51
|
+
def splitters
|
52
|
+
[
|
53
|
+
BracketedUrlSplitter,
|
54
|
+
ColonSeparatedSplitter,
|
55
|
+
VariableSplitter,
|
56
|
+
RegionalTextSplitter,
|
57
|
+
UnspecifiedSplitter
|
58
|
+
]
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
class Third < String
|
4
|
+
|
5
|
+
attr_accessor :type, :value, :arguments, :rdf_style, :url
|
6
|
+
|
7
|
+
def initialize(string, args = {})
|
8
|
+
@type = args[:type]
|
9
|
+
@value = args[:value]
|
10
|
+
@arguments = args[:arguments]
|
11
|
+
@rdf_style = args[:rdf_style]
|
12
|
+
@url = args[:url]
|
13
|
+
super(string)
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
|
2
|
+
module TripleParser
|
3
|
+
class ToRdf
|
4
|
+
|
5
|
+
def initialize(third)
|
6
|
+
@third = third
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
get_output.to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
protected
|
14
|
+
def get_output
|
15
|
+
if third_type
|
16
|
+
pass_to_type_method
|
17
|
+
else
|
18
|
+
unknown
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def third_type
|
23
|
+
@third_type ||= @third.type if @third.type
|
24
|
+
end
|
25
|
+
|
26
|
+
def pass_to_type_method
|
27
|
+
if type_if_known_colon_pair?
|
28
|
+
send(colon_prefix)
|
29
|
+
|
30
|
+
elsif method_exists_for?(third_type)
|
31
|
+
send(third_type)
|
32
|
+
|
33
|
+
else
|
34
|
+
unknown_type
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def type_if_known_colon_pair?
|
39
|
+
if colon_pair_pattern =~ third_type
|
40
|
+
method_exists_for?(colon_prefix)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def method_exists_for?(name)
|
45
|
+
self.class.instance_methods.include?(name.to_sym)
|
46
|
+
end
|
47
|
+
|
48
|
+
# prefix:suffix
|
49
|
+
def colon_pair_pattern
|
50
|
+
/([a-zA-Z\-\_]+)\:([a-zA-Z\-\_]+)/
|
51
|
+
end
|
52
|
+
|
53
|
+
def colon_match
|
54
|
+
@colon_match ||= colon_pair_pattern.match(third_type)
|
55
|
+
end
|
56
|
+
|
57
|
+
def colon_prefix
|
58
|
+
@colon_prefix ||= colon_match[1]
|
59
|
+
end
|
60
|
+
|
61
|
+
def colon_suffix
|
62
|
+
@colon_suffix ||= colon_match[2]
|
63
|
+
end
|
64
|
+
|
65
|
+
def owl
|
66
|
+
"<http://purl.org/NET/c4dm/#{colon_suffix}.owl##{@third.value}>"
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def xml
|
71
|
+
%Q{"#{@third.value}"^^<http://www.w3.org/2001/XMLSchema##{camelcase(colon_suffix)}>}
|
72
|
+
end
|
73
|
+
|
74
|
+
def dc
|
75
|
+
"<http://purl.org/dc/#{colon_suffix}/#{@third.value}>"
|
76
|
+
end
|
77
|
+
|
78
|
+
def text
|
79
|
+
%Q{"#{@third.value}"@#{colon_suffix}}
|
80
|
+
end
|
81
|
+
|
82
|
+
def unknown
|
83
|
+
@third
|
84
|
+
end
|
85
|
+
|
86
|
+
def unknown_type
|
87
|
+
"#{third_type}:#{@third.value}"
|
88
|
+
end
|
89
|
+
|
90
|
+
def id
|
91
|
+
"<http://#{Settings.application_domain}/things/#{@third.value}#id>"
|
92
|
+
end
|
93
|
+
|
94
|
+
def domain
|
95
|
+
"<http://#{Settings.application_domain}/ontologies/domain/name>"
|
96
|
+
end
|
97
|
+
|
98
|
+
def resource
|
99
|
+
"<http://dbpedia.org/resource/#{@third.value}>"
|
100
|
+
end
|
101
|
+
|
102
|
+
def ontology
|
103
|
+
"<http://data.press.net/ontology/tag/#{@third.value}>"
|
104
|
+
end
|
105
|
+
|
106
|
+
def asset
|
107
|
+
"<http://#{Settings.application_domain}/ontologies/asset/#{@third.value}>"
|
108
|
+
end
|
109
|
+
|
110
|
+
def function
|
111
|
+
arguments = @third.arguments.collect{|a| self.class.new(a)}
|
112
|
+
"#{@third.value}(#{arguments.join(' ')})"
|
113
|
+
end
|
114
|
+
|
115
|
+
def var
|
116
|
+
"?#{@third.value}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def rdf
|
120
|
+
"<http://www.w3.org/1999/02/22-rdf-syntax-ns##{@third.value}>"
|
121
|
+
end
|
122
|
+
|
123
|
+
def camelcase(text)
|
124
|
+
while underscore_pos = text.index(/_[a-z]/)
|
125
|
+
letter_after_pos = underscore_pos + 1
|
126
|
+
letter_after = text[letter_after_pos, 1]
|
127
|
+
text[underscore_pos..letter_after_pos] = letter_after.upcase
|
128
|
+
end
|
129
|
+
return text
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module TripleParser
|
2
|
+
class TripleSet
|
3
|
+
require_relative 't_maker'
|
4
|
+
|
5
|
+
def initialize(triple)
|
6
|
+
@triple = triple
|
7
|
+
end
|
8
|
+
|
9
|
+
def parts
|
10
|
+
@parts ||= get_parts
|
11
|
+
end
|
12
|
+
|
13
|
+
def subject
|
14
|
+
@subject ||= parts[0]
|
15
|
+
end
|
16
|
+
|
17
|
+
def predicate
|
18
|
+
@predicate ||= parts[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
def object
|
22
|
+
@object ||= (!parts[2] || parts[2].empty?) ? nil : parts[2]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def get_parts
|
27
|
+
match = pattern_to_split_triple.match(@triple)
|
28
|
+
matches = [1, 2, 3].collect{|i| match[i] if i != @skip_triple_part}.compact
|
29
|
+
matches.collect{|m| TMaker.brew(m)}
|
30
|
+
end
|
31
|
+
|
32
|
+
def pattern_to_split_triple
|
33
|
+
if triple_is_function?
|
34
|
+
@skip_triple_part = 3
|
35
|
+
pattern_to_split_function
|
36
|
+
else
|
37
|
+
triple_spitting_pattern
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def triple_is_function?
|
42
|
+
function_pattern =~ @triple
|
43
|
+
end
|
44
|
+
|
45
|
+
def function_pattern
|
46
|
+
|
47
|
+
Regexp.new([
|
48
|
+
start_with_possible_white_space_pattern,
|
49
|
+
start_variable_or_bracketed_url_pattern,
|
50
|
+
receiving_variable_or_bracketed_url_pattern,
|
51
|
+
function_name_pattern,
|
52
|
+
function_arguments_pattern,
|
53
|
+
closing_white_space_or_period_pattern,
|
54
|
+
].join)
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
def pattern_to_split_function
|
59
|
+
|
60
|
+
receiving_element_pattern = '[\w?:\/_\-#<>\.]+'
|
61
|
+
|
62
|
+
Regexp.new(
|
63
|
+
[
|
64
|
+
'(',
|
65
|
+
receiving_element_pattern,
|
66
|
+
')',
|
67
|
+
spaces,
|
68
|
+
'(',
|
69
|
+
function_name_pattern,
|
70
|
+
function_arguments_pattern,
|
71
|
+
')'
|
72
|
+
].join
|
73
|
+
)
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
def triple_spitting_pattern
|
78
|
+
triple_containing_single_quoted_text = %q{\S*\'.*\'\S*}
|
79
|
+
triple_containing_double_quoted_text = %q{\S*\".*\"\S*}
|
80
|
+
text_not_split_by_spaces = '\S*'
|
81
|
+
triple = [triple_containing_single_quoted_text, triple_containing_double_quoted_text, text_not_split_by_spaces].join('|')
|
82
|
+
spaced_triples = Array.new(3, "(#{triple})").join('\s+')
|
83
|
+
Regexp.new(spaced_triples)
|
84
|
+
end
|
85
|
+
|
86
|
+
def spaces
|
87
|
+
'\s+'
|
88
|
+
end
|
89
|
+
|
90
|
+
def start_with_possible_white_space_pattern
|
91
|
+
'^\s*'
|
92
|
+
end
|
93
|
+
|
94
|
+
def start_variable_or_bracketed_url_pattern
|
95
|
+
'(\?|<http:\/\/)'
|
96
|
+
end
|
97
|
+
|
98
|
+
def receiving_variable_or_bracketed_url_pattern
|
99
|
+
'[\w\/\-_#\.]+>?\s+'
|
100
|
+
end
|
101
|
+
|
102
|
+
def basic_text_pattern
|
103
|
+
'\??[\w_\-:]+'
|
104
|
+
end
|
105
|
+
|
106
|
+
def function_name_pattern
|
107
|
+
'[\w_\-:]+'
|
108
|
+
end
|
109
|
+
|
110
|
+
def function_arguments_pattern
|
111
|
+
%q{\(([\w_\?:"']+[\s\,]*)+\)}
|
112
|
+
end
|
113
|
+
|
114
|
+
def closing_white_space_or_period_pattern
|
115
|
+
'[\s\.]*$'
|
116
|
+
end
|
117
|
+
|
118
|
+
def standard_rdf_element_or_text_pattern
|
119
|
+
'(?:<.*>|[\w\?\-:]+)'
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
require_relative 'splitter'
|
4
|
+
|
5
|
+
class UnspecifiedSplitter < Splitter
|
6
|
+
|
7
|
+
def self.can_split?(string)
|
8
|
+
any_word_possibly_hyphenated =~ string
|
9
|
+
end
|
10
|
+
|
11
|
+
def rdf_style
|
12
|
+
'unspecified'
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def self.any_word_possibly_hyphenated
|
17
|
+
/^\s*[\w\-]*\s*$/
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_parts
|
21
|
+
{}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module TripleParser
|
2
|
+
|
3
|
+
class VariableSplitter < Splitter
|
4
|
+
|
5
|
+
def self.can_split?(string)
|
6
|
+
any_word_starting_with_question_mark =~ string
|
7
|
+
end
|
8
|
+
|
9
|
+
def rdf_style
|
10
|
+
'variable'
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def self.any_word_starting_with_question_mark
|
15
|
+
/^\s*\?[A-Za-z][\w\-]*\s*$/
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_parts
|
19
|
+
{
|
20
|
+
:type => 'var',
|
21
|
+
:value => variable_name
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def variable_name
|
26
|
+
self[1..length]
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require_relative 'triple_parser/t_maker'
|
2
|
+
require_relative 'triple_parser/triple_set'
|
3
|
+
require_relative 'triple_parser/to_rdf'
|
4
|
+
require_relative 'triple_parser/settings'
|
5
|
+
|
6
|
+
|
7
|
+
module TripleParser
|
8
|
+
def self.input(new_input)
|
9
|
+
@input = new_input
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.triples
|
13
|
+
@triples = Array.new
|
14
|
+
case @input.class.to_s
|
15
|
+
when 'String'
|
16
|
+
@input.each_line do |triple|
|
17
|
+
next if /^\s*$/ =~ triple
|
18
|
+
@triples << TripleSet.new(triple)
|
19
|
+
end
|
20
|
+
when 'Array'
|
21
|
+
@input.compact.each do |triple|
|
22
|
+
@triples << TripleSet.new(triple)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
raise "Input format not recognised"
|
26
|
+
end
|
27
|
+
|
28
|
+
return @triples
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.to_rdf(input)
|
32
|
+
@input = input
|
33
|
+
output = triples.collect do |t|
|
34
|
+
[
|
35
|
+
get_rdf_for(t.subject),
|
36
|
+
get_rdf_for(t.predicate),
|
37
|
+
get_rdf_for(t.object)
|
38
|
+
].join(' ') + " ."
|
39
|
+
end
|
40
|
+
return output
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.get_rdf_for(third)
|
44
|
+
ToRdf.new(third).to_s if third
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: triple_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.9
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Rob Nichols
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-07 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Triple Parser - Parses RDF triples and converts them into standard format
|
15
|
+
email: rob@undervale.co.uk
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- README.md
|
21
|
+
- lib/main.rb
|
22
|
+
- lib/triple_parser.rb
|
23
|
+
- lib/triple_parser/triple_set.rb
|
24
|
+
- lib/triple_parser/variable_splitter.rb
|
25
|
+
- lib/triple_parser/third.rb
|
26
|
+
- lib/triple_parser/t_maker.rb
|
27
|
+
- lib/triple_parser/colon_separated_splitter.rb
|
28
|
+
- lib/triple_parser/splitter.rb
|
29
|
+
- lib/triple_parser/regional_text_splitter.rb
|
30
|
+
- lib/triple_parser/to_rdf.rb
|
31
|
+
- lib/triple_parser/unspecified_splitter.rb
|
32
|
+
- lib/triple_parser/bracketed_url_splitter.rb
|
33
|
+
- lib/triple_parser/settings.rb
|
34
|
+
homepage: https://github.com/reggieb/triple_parser
|
35
|
+
licenses: []
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.8.10
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Triple Parser - Parses RDF triples and converts them into standard format
|
58
|
+
test_files: []
|