triple_parser 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ Triple parser
2
+ =============
3
+
4
+ Converts text containing RDF triples between simple and complex versions
5
+
6
+ For example:
7
+
8
+ triples = <<EOF
9
+ id:112121212111111111111 owl:event:time id:234242342342334234242432
10
+ id:234242342342334234242432 rdf:type owl:timeline:Interval
11
+ id:234242342342334234242432 owl:timeline:beginsAtDateTime xml:date_time:'2010-02-15T12:00:00Z'
12
+ id:234242342342334234242432 owl:timeline:endsAtDateTime xml:date_time:'2010-02-17T12:00:00Z'
13
+ EOF
14
+
15
+ TripleParser.to_rdf(triples)
16
+
17
+ Outputs:
18
+
19
+ [
20
+ "<http://en.wikipedia.org/wiki/Triplestore/things/112121212111111111111#id> <http://purl.org/NET/c4dm/event.owl#time> <http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> .",
21
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/timeline.owl#Interval> .",
22
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#beginsAtDateTime> "2010-02-15T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .",
23
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#endsAtDateTime> "2010-02-17T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> ."
24
+ ]
25
+
26
+ Setting site specific application url
27
+ -------------------------------------
28
+ The default application url is 'en.wikipedia.org/wiki/Triplestore' where you can read more about triplestores.
29
+ To change this to your site specific url, use this (in a Rails initializer for example):
30
+
31
+ TripleParser::Settings.application_domain = 'undervale.co.uk'
32
+
33
+ Playground
34
+ ----------
35
+ A simple Sinatra site is included, where you can enter triples and see how they converted by TripleParser.to_rdf
36
+
37
+ To play:
38
+
39
+ ruby web.rb
40
+
41
+ The page can then be viewed at http://localhost:4567
42
+
43
+ Enter your triples in the text area and click submit. The output will appear below the text area
data/lib/main.rb ADDED
@@ -0,0 +1,16 @@
1
+ require_relative 'triple_parser'
2
+
3
+ text = <<EOF
4
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c type_is Event
5
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c name_is string:'Troops tighten grip on Taliban stronghold'
6
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c has_time id:0237eb08-e4a5-463c-baaa-5a28f2b63707
7
+ <http://www.undervale.co.uk/things/9108fe02-0bbb-4ed9-890f-b454877ce12c#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/event.owl#Event>.
8
+ EOF
9
+
10
+ TripleParser.input(text)
11
+
12
+ triples = TripleParser.triples
13
+
14
+ triples.each do |t|
15
+ p t.object.value
16
+ end
@@ -0,0 +1,183 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class BracketedUrlSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ bracketed_url_pattern =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'bracketed_url'
13
+ end
14
+
15
+ private
16
+ def self.bracketed_url_pattern
17
+ /\<http\:\/\/[\S]+\>/
18
+ end
19
+
20
+ def get_parts
21
+ type_value_from_bracketed_url.merge(
22
+ :url => get_url
23
+ )
24
+ end
25
+
26
+ def get_url
27
+ url_pattern = /http\:\/\/[\w\-\.]+\.[a-zA-Z]{2,7}(?:\/[\w\-\._]+)*/
28
+ match(url_pattern)[0]
29
+ end
30
+
31
+
32
+ def type_value_from_bracketed_url
33
+ if text_after_hash_pattern =~ self
34
+ type_value_from_text_after_hash_url
35
+
36
+ elsif resource_url_pattern =~ self
37
+ type_value_for_resource
38
+
39
+ elsif ontology_url_pattern =~ self
40
+ type_value_for_ontology
41
+
42
+ elsif dc_terms_pattern =~ self
43
+ type_value_for_dc_terms
44
+
45
+ elsif asset_pattern =~ self
46
+ type_value_for_asset
47
+
48
+ else
49
+ type_value_for_unknown_url
50
+
51
+ end
52
+ end
53
+
54
+ def text_after_hash_pattern
55
+ /\#([a-zA-Z]+)/
56
+ end
57
+
58
+ def after_hash
59
+ @after_hash ||= match(text_after_hash_pattern)[1] if match(text_after_hash_pattern)
60
+ end
61
+
62
+ def type_value_from_text_after_hash_url
63
+
64
+
65
+ if after_hash == 'id'
66
+ type_value_for_id_after_hash
67
+
68
+ elsif xml_data_pattern =~ self
69
+ type_value_for_xml_schema
70
+
71
+ elsif rdf_url_pattern =~ self
72
+ type_value_for_rdf
73
+
74
+ elsif owl_pattern =~ self
75
+ type_value_for_owl
76
+
77
+ else
78
+ {}
79
+ end
80
+ end
81
+
82
+ def xml_data_pattern
83
+ /["'](.+)["']\^{2}\<http/
84
+ end
85
+
86
+ def type_value_for_xml_schema
87
+ {
88
+ :type => "xml:#{underscore(after_hash)}",
89
+ :value => match(xml_data_pattern)[1]
90
+ }
91
+ end
92
+
93
+ def owl_pattern
94
+ /\.owl#/
95
+ end
96
+
97
+ def type_value_for_owl
98
+ type = match(text_before_hash_pattern)[1]
99
+ {
100
+ :type => "owl:#{type.gsub(/\.owl/, "")}",
101
+ :value => after_hash
102
+ }
103
+ end
104
+
105
+ def rdf_url_pattern
106
+ /rdf\-syntax\-ns/
107
+ end
108
+
109
+ def type_value_for_rdf
110
+ {
111
+ :type => 'rdf',
112
+ :value => after_hash
113
+ }
114
+ end
115
+
116
+ def type_value_for_id_after_hash
117
+ {
118
+ :type => 'id',
119
+ :value => match(text_before_hash_pattern)[1]
120
+ }
121
+ end
122
+
123
+ def text_before_hash_pattern
124
+ /([\w\-\._]*)\#/
125
+ end
126
+
127
+ def type_value_for_resource
128
+ {
129
+ :type => match(resource_url_pattern)[1],
130
+ :value => match(last_element_of_url_pattern)[1]
131
+ }
132
+ end
133
+
134
+ def ontology_url_pattern
135
+ /data\.press\.net\/ontology\/(?:\w+\/)+(\w+)/
136
+ end
137
+
138
+ def type_value_for_ontology
139
+ {
140
+ :type => 'ontology',
141
+ :value => match(ontology_url_pattern)[1]
142
+ }
143
+ end
144
+
145
+ def dc_terms_pattern
146
+ /\/dc\/terms\/([a-zA-Z_]+)/
147
+ end
148
+
149
+ def type_value_for_dc_terms
150
+ {
151
+ :type => 'dc:terms',
152
+ :value => match(dc_terms_pattern)[1]
153
+ }
154
+ end
155
+
156
+ def asset_pattern
157
+ /\/ontologies\/asset\/([a-zA-Z_]+)/
158
+ end
159
+
160
+ def type_value_for_asset
161
+ {
162
+ :type => 'asset',
163
+ :value => match(asset_pattern)[1]
164
+ }
165
+ end
166
+
167
+ def type_value_for_unknown_url
168
+ {
169
+ :url => get_url
170
+ }
171
+ end
172
+
173
+ def last_element_of_url_pattern
174
+ /\/([a-zA-Z_]+)\>/
175
+ end
176
+
177
+ def resource_url_pattern
178
+ /(#{resource_identifiers.join('|')})\/[a-zA-Z_]+\>/
179
+ end
180
+
181
+ end
182
+
183
+ end
@@ -0,0 +1,95 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class ColonSeparatedSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ colon_separated_rdf_pattern =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'colon_separated'
13
+ end
14
+
15
+ private
16
+ def self.colon_separated_rdf_pattern
17
+ colon_pair = '[a-z\-]+:[\w-]+' # this:example
18
+ colon_followed_by_quoted_string = %q{:(['"].+['"]|\w+)} # :'this example'
19
+ function = '\(.+\)' # (this, example)
20
+ colon_separated_text = "#{colon_pair}(#{colon_followed_by_quoted_string}|#{function})?" # this:example or this:example:'with string' or this:function(example)
21
+
22
+ Regexp.new(colon_separated_text)
23
+ end
24
+
25
+ def get_parts
26
+ if function_pattern =~ self
27
+ get_parts_for_function
28
+
29
+ elsif ontologies.include?(self)
30
+ get_ontology
31
+
32
+ elsif double_colon_first_elements.include?(before_colon)
33
+ get_double_colon_entry
34
+
35
+ elsif include?(':')
36
+ get_parts_for_type_value_pair
37
+
38
+ else
39
+ raise "Unable to get parts from '#{self}'"
40
+ end
41
+ end
42
+
43
+ def function_pattern
44
+ /^[\w_:]+\(.+\)/
45
+ end
46
+
47
+ def get_parts_for_function
48
+ opening_bracket = index('(')
49
+ closing_bracket = index(')')
50
+ name = self[0, opening_bracket]
51
+ @arguments = self[opening_bracket + 1..closing_bracket - 1].split(/[\,\s]+/)
52
+ @arguments = @arguments.collect!{|r| TMaker.brew(r)}
53
+ {:type => 'function', :value => name}
54
+ end
55
+
56
+ def get_parts_for_type_value_pair
57
+ {:type => before_colon, :value => after_colon}
58
+ end
59
+
60
+ def before_colon
61
+ @before_colon ||= self[0, (index(':'))] if include?(':')
62
+ end
63
+
64
+ def after_colon
65
+ @after_colon ||= self[index(':') + 1..length] if include?(':')
66
+ end
67
+
68
+ def remove_bracketing_quotes(text)
69
+ if /^'.+'$/ =~ text || /^".+"$/ =~ text
70
+ return text.gsub(/^['"]/, "").gsub(/['"]$/, "")
71
+ else
72
+ return text
73
+ end
74
+ end
75
+
76
+ def double_colon_first_elements
77
+ %w{xml owl dc text}
78
+ end
79
+
80
+ def get_double_colon_entry
81
+ elements = split(':')
82
+ text_after_second_colon = elements[2..elements.length].join(':')
83
+ {
84
+ :type => "#{elements[0]}:#{elements[1]}",
85
+ :value => remove_bracketing_quotes(text_after_second_colon)
86
+ }
87
+ end
88
+
89
+ def get_ontology
90
+ {:type => 'ontology', :value => self}
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,41 @@
1
+ module TripleParser
2
+
3
+ class RegionalTextSplitter < Splitter
4
+
5
+ def self.can_split?(string)
6
+ quoted_text_ampersand_and_language_identifier =~ string
7
+ end
8
+
9
+ def rdf_style
10
+ 'regional_text'
11
+ end
12
+
13
+ private
14
+ def self.quoted_text_ampersand_and_language_identifier
15
+ /('.*'|".*")\@[\w\-_]+/
16
+ end
17
+
18
+ def get_parts
19
+ {
20
+ :type => "text:#{region}",
21
+ :value => text
22
+ }
23
+ end
24
+
25
+ def region
26
+ after_ampersand
27
+ end
28
+
29
+ def text
30
+ before_ampersand.gsub(/['"]/, "")
31
+ end
32
+
33
+ def after_ampersand
34
+ split(/@/).last
35
+ end
36
+
37
+ def before_ampersand
38
+ split(/@/).first
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,12 @@
1
+
2
+ module TripleParser
3
+ module Settings
4
+ def self.application_domain
5
+ @@application_url ||= 'en.wikipedia.org/wiki/Triplestore'
6
+ end
7
+
8
+ def self.application_domain=(url)
9
+ @@application_url = url
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,52 @@
1
+
2
+ module TripleParser
3
+ class Splitter < String
4
+
5
+ attr_accessor :parts, :arguments, :rdf_style, :url
6
+
7
+ def self.can_split?(string)
8
+ raise "Need to define test to determine if string can be converted using this class"
9
+ end
10
+
11
+ def parts
12
+ @parts ||= get_parts
13
+ end
14
+
15
+ def type
16
+ parts[:type]
17
+ end
18
+
19
+ def value
20
+ parts[:value]
21
+ end
22
+
23
+ def url
24
+ parts[:url]
25
+ end
26
+
27
+ def rdf_style
28
+ raise "Need to define string that identifies rdf stype"
29
+ end
30
+
31
+ private
32
+ def get_parts
33
+ raise "Need to define how to split text input into seperate parts"
34
+ end
35
+
36
+ def underscore(text)
37
+ while letter_before_capital = text.index(/[a-z][A-Z]/)
38
+ text.insert(letter_before_capital + 1, '_')
39
+ end
40
+ text.downcase
41
+ end
42
+
43
+ def resource_identifiers
44
+ %w{resource domain}
45
+ end
46
+
47
+ def ontologies
48
+ %w{about mentions}
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,63 @@
1
+ module TripleParser
2
+ class TMaker
3
+ require_relative 'third'
4
+ require_relative 'bracketed_url_splitter'
5
+ require_relative 'colon_separated_splitter'
6
+ require_relative 'variable_splitter'
7
+ require_relative 'unspecified_splitter'
8
+ require_relative 'regional_text_splitter'
9
+
10
+ attr_accessor :arguments, :rdf_style, :url
11
+
12
+ def self.brew(*args)
13
+ t_maker = new(*args)
14
+ t_maker.third
15
+ end
16
+
17
+ def initialize(*args)
18
+ @string = args.first
19
+ end
20
+
21
+ def third
22
+ begin
23
+ Third.new(
24
+ @string,
25
+ :type => split_text.type,
26
+ :value => split_text.value,
27
+ :url => split_text.url,
28
+ :rdf_style => split_text.rdf_style,
29
+ :arguments => split_text.arguments
30
+ )
31
+ rescue
32
+ Third.new(@string, :rdf_style => 'unknown')
33
+ end
34
+ end
35
+
36
+ def split_text
37
+ @split_text ||= get_split_text
38
+ end
39
+
40
+ private
41
+ def get_split_text
42
+ splitters.each do |splitter|
43
+ if splitter.can_split?(@string)
44
+ split_text = splitter.new(@string)
45
+ return split_text
46
+ end
47
+ end
48
+ raise "Unable to get parts for third from: '#{@string}'"
49
+ end
50
+
51
+ def splitters
52
+ [
53
+ BracketedUrlSplitter,
54
+ ColonSeparatedSplitter,
55
+ VariableSplitter,
56
+ RegionalTextSplitter,
57
+ UnspecifiedSplitter
58
+ ]
59
+ end
60
+
61
+
62
+ end
63
+ end
@@ -0,0 +1,17 @@
1
+ module TripleParser
2
+
3
+ class Third < String
4
+
5
+ attr_accessor :type, :value, :arguments, :rdf_style, :url
6
+
7
+ def initialize(string, args = {})
8
+ @type = args[:type]
9
+ @value = args[:value]
10
+ @arguments = args[:arguments]
11
+ @rdf_style = args[:rdf_style]
12
+ @url = args[:url]
13
+ super(string)
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,133 @@
1
+
2
+ module TripleParser
3
+ class ToRdf
4
+
5
+ def initialize(third)
6
+ @third = third
7
+ end
8
+
9
+ def to_s
10
+ get_output.to_s
11
+ end
12
+
13
+ protected
14
+ def get_output
15
+ if third_type
16
+ pass_to_type_method
17
+ else
18
+ unknown
19
+ end
20
+ end
21
+
22
+ def third_type
23
+ @third_type ||= @third.type if @third.type
24
+ end
25
+
26
+ def pass_to_type_method
27
+ if type_if_known_colon_pair?
28
+ send(colon_prefix)
29
+
30
+ elsif method_exists_for?(third_type)
31
+ send(third_type)
32
+
33
+ else
34
+ unknown_type
35
+ end
36
+ end
37
+
38
+ def type_if_known_colon_pair?
39
+ if colon_pair_pattern =~ third_type
40
+ method_exists_for?(colon_prefix)
41
+ end
42
+ end
43
+
44
+ def method_exists_for?(name)
45
+ self.class.instance_methods.include?(name.to_sym)
46
+ end
47
+
48
+ # prefix:suffix
49
+ def colon_pair_pattern
50
+ /([a-zA-Z\-\_]+)\:([a-zA-Z\-\_]+)/
51
+ end
52
+
53
+ def colon_match
54
+ @colon_match ||= colon_pair_pattern.match(third_type)
55
+ end
56
+
57
+ def colon_prefix
58
+ @colon_prefix ||= colon_match[1]
59
+ end
60
+
61
+ def colon_suffix
62
+ @colon_suffix ||= colon_match[2]
63
+ end
64
+
65
+ def owl
66
+ "<http://purl.org/NET/c4dm/#{colon_suffix}.owl##{@third.value}>"
67
+ end
68
+
69
+
70
+ def xml
71
+ %Q{"#{@third.value}"^^<http://www.w3.org/2001/XMLSchema##{camelcase(colon_suffix)}>}
72
+ end
73
+
74
+ def dc
75
+ "<http://purl.org/dc/#{colon_suffix}/#{@third.value}>"
76
+ end
77
+
78
+ def text
79
+ %Q{"#{@third.value}"@#{colon_suffix}}
80
+ end
81
+
82
+ def unknown
83
+ @third
84
+ end
85
+
86
+ def unknown_type
87
+ "#{third_type}:#{@third.value}"
88
+ end
89
+
90
+ def id
91
+ "<http://#{Settings.application_domain}/things/#{@third.value}#id>"
92
+ end
93
+
94
+ def domain
95
+ "<http://#{Settings.application_domain}/ontologies/domain/name>"
96
+ end
97
+
98
+ def resource
99
+ "<http://dbpedia.org/resource/#{@third.value}>"
100
+ end
101
+
102
+ def ontology
103
+ "<http://data.press.net/ontology/tag/#{@third.value}>"
104
+ end
105
+
106
+ def asset
107
+ "<http://#{Settings.application_domain}/ontologies/asset/#{@third.value}>"
108
+ end
109
+
110
+ def function
111
+ arguments = @third.arguments.collect{|a| self.class.new(a)}
112
+ "#{@third.value}(#{arguments.join(' ')})"
113
+ end
114
+
115
+ def var
116
+ "?#{@third.value}"
117
+ end
118
+
119
+ def rdf
120
+ "<http://www.w3.org/1999/02/22-rdf-syntax-ns##{@third.value}>"
121
+ end
122
+
123
+ def camelcase(text)
124
+ while underscore_pos = text.index(/_[a-z]/)
125
+ letter_after_pos = underscore_pos + 1
126
+ letter_after = text[letter_after_pos, 1]
127
+ text[underscore_pos..letter_after_pos] = letter_after.upcase
128
+ end
129
+ return text
130
+ end
131
+
132
+ end
133
+ end
@@ -0,0 +1,123 @@
1
+ module TripleParser
2
+ class TripleSet
3
+ require_relative 't_maker'
4
+
5
+ def initialize(triple)
6
+ @triple = triple
7
+ end
8
+
9
+ def parts
10
+ @parts ||= get_parts
11
+ end
12
+
13
+ def subject
14
+ @subject ||= parts[0]
15
+ end
16
+
17
+ def predicate
18
+ @predicate ||= parts[1]
19
+ end
20
+
21
+ def object
22
+ @object ||= (!parts[2] || parts[2].empty?) ? nil : parts[2]
23
+ end
24
+
25
+ private
26
+ def get_parts
27
+ match = pattern_to_split_triple.match(@triple)
28
+ matches = [1, 2, 3].collect{|i| match[i] if i != @skip_triple_part}.compact
29
+ matches.collect{|m| TMaker.brew(m)}
30
+ end
31
+
32
+ def pattern_to_split_triple
33
+ if triple_is_function?
34
+ @skip_triple_part = 3
35
+ pattern_to_split_function
36
+ else
37
+ triple_spitting_pattern
38
+ end
39
+ end
40
+
41
+ def triple_is_function?
42
+ function_pattern =~ @triple
43
+ end
44
+
45
+ def function_pattern
46
+
47
+ Regexp.new([
48
+ start_with_possible_white_space_pattern,
49
+ start_variable_or_bracketed_url_pattern,
50
+ receiving_variable_or_bracketed_url_pattern,
51
+ function_name_pattern,
52
+ function_arguments_pattern,
53
+ closing_white_space_or_period_pattern,
54
+ ].join)
55
+
56
+ end
57
+
58
+ def pattern_to_split_function
59
+
60
+ receiving_element_pattern = '[\w?:\/_\-#<>\.]+'
61
+
62
+ Regexp.new(
63
+ [
64
+ '(',
65
+ receiving_element_pattern,
66
+ ')',
67
+ spaces,
68
+ '(',
69
+ function_name_pattern,
70
+ function_arguments_pattern,
71
+ ')'
72
+ ].join
73
+ )
74
+
75
+ end
76
+
77
+ def triple_spitting_pattern
78
+ triple_containing_single_quoted_text = %q{\S*\'.*\'\S*}
79
+ triple_containing_double_quoted_text = %q{\S*\".*\"\S*}
80
+ text_not_split_by_spaces = '\S*'
81
+ triple = [triple_containing_single_quoted_text, triple_containing_double_quoted_text, text_not_split_by_spaces].join('|')
82
+ spaced_triples = Array.new(3, "(#{triple})").join('\s+')
83
+ Regexp.new(spaced_triples)
84
+ end
85
+
86
+ def spaces
87
+ '\s+'
88
+ end
89
+
90
+ def start_with_possible_white_space_pattern
91
+ '^\s*'
92
+ end
93
+
94
+ def start_variable_or_bracketed_url_pattern
95
+ '(\?|<http:\/\/)'
96
+ end
97
+
98
+ def receiving_variable_or_bracketed_url_pattern
99
+ '[\w\/\-_#\.]+>?\s+'
100
+ end
101
+
102
+ def basic_text_pattern
103
+ '\??[\w_\-:]+'
104
+ end
105
+
106
+ def function_name_pattern
107
+ '[\w_\-:]+'
108
+ end
109
+
110
+ def function_arguments_pattern
111
+ %q{\(([\w_\?:"']+[\s\,]*)+\)}
112
+ end
113
+
114
+ def closing_white_space_or_period_pattern
115
+ '[\s\.]*$'
116
+ end
117
+
118
+ def standard_rdf_element_or_text_pattern
119
+ '(?:<.*>|[\w\?\-:]+)'
120
+ end
121
+
122
+ end
123
+ end
@@ -0,0 +1,27 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class UnspecifiedSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ any_word_possibly_hyphenated =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'unspecified'
13
+ end
14
+
15
+ private
16
+ def self.any_word_possibly_hyphenated
17
+ /^\s*[\w\-]*\s*$/
18
+ end
19
+
20
+ def get_parts
21
+ {}
22
+ end
23
+
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,30 @@
1
+ module TripleParser
2
+
3
+ class VariableSplitter < Splitter
4
+
5
+ def self.can_split?(string)
6
+ any_word_starting_with_question_mark =~ string
7
+ end
8
+
9
+ def rdf_style
10
+ 'variable'
11
+ end
12
+
13
+ private
14
+ def self.any_word_starting_with_question_mark
15
+ /^\s*\?[A-Za-z][\w\-]*\s*$/
16
+ end
17
+
18
+ def get_parts
19
+ {
20
+ :type => 'var',
21
+ :value => variable_name
22
+ }
23
+ end
24
+
25
+ def variable_name
26
+ self[1..length]
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,46 @@
1
+ require_relative 'triple_parser/t_maker'
2
+ require_relative 'triple_parser/triple_set'
3
+ require_relative 'triple_parser/to_rdf'
4
+ require_relative 'triple_parser/settings'
5
+
6
+
7
+ module TripleParser
8
+ def self.input(new_input)
9
+ @input = new_input
10
+ end
11
+
12
+ def self.triples
13
+ @triples = Array.new
14
+ case @input.class.to_s
15
+ when 'String'
16
+ @input.each_line do |triple|
17
+ next if /^\s*$/ =~ triple
18
+ @triples << TripleSet.new(triple)
19
+ end
20
+ when 'Array'
21
+ @input.compact.each do |triple|
22
+ @triples << TripleSet.new(triple)
23
+ end
24
+ else
25
+ raise "Input format not recognised"
26
+ end
27
+
28
+ return @triples
29
+ end
30
+
31
+ def self.to_rdf(input)
32
+ @input = input
33
+ output = triples.collect do |t|
34
+ [
35
+ get_rdf_for(t.subject),
36
+ get_rdf_for(t.predicate),
37
+ get_rdf_for(t.object)
38
+ ].join(' ') + " ."
39
+ end
40
+ return output
41
+ end
42
+
43
+ def self.get_rdf_for(third)
44
+ ToRdf.new(third).to_s if third
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: triple_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.9
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Rob Nichols
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-07 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Triple Parser - Parses RDF triples and converts them into standard format
15
+ email: rob@undervale.co.uk
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - lib/main.rb
22
+ - lib/triple_parser.rb
23
+ - lib/triple_parser/triple_set.rb
24
+ - lib/triple_parser/variable_splitter.rb
25
+ - lib/triple_parser/third.rb
26
+ - lib/triple_parser/t_maker.rb
27
+ - lib/triple_parser/colon_separated_splitter.rb
28
+ - lib/triple_parser/splitter.rb
29
+ - lib/triple_parser/regional_text_splitter.rb
30
+ - lib/triple_parser/to_rdf.rb
31
+ - lib/triple_parser/unspecified_splitter.rb
32
+ - lib/triple_parser/bracketed_url_splitter.rb
33
+ - lib/triple_parser/settings.rb
34
+ homepage: https://github.com/reggieb/triple_parser
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.10
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Triple Parser - Parses RDF triples and converts them into standard format
58
+ test_files: []