triple_parser 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ Triple parser
2
+ =============
3
+
4
+ Converts text containing RDF triples between simple and complex versions
5
+
6
+ For example:
7
+
8
+ triples = <<EOF
9
+ id:112121212111111111111 owl:event:time id:234242342342334234242432
10
+ id:234242342342334234242432 rdf:type owl:timeline:Interval
11
+ id:234242342342334234242432 owl:timeline:beginsAtDateTime xml:date_time:'2010-02-15T12:00:00Z'
12
+ id:234242342342334234242432 owl:timeline:endsAtDateTime xml:date_time:'2010-02-17T12:00:00Z'
13
+ EOF
14
+
15
+ TripleParser.to_rdf(triples)
16
+
17
+ Outputs:
18
+
19
+ [
20
+ "<http://en.wikipedia.org/wiki/Triplestore/things/112121212111111111111#id> <http://purl.org/NET/c4dm/event.owl#time> <http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> .",
21
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/timeline.owl#Interval> .",
22
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#beginsAtDateTime> "2010-02-15T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .",
23
+ "<http://en.wikipedia.org/wiki/Triplestore/things/234242342342334234242432#id> <http://purl.org/NET/c4dm/timeline.owl#endsAtDateTime> "2010-02-17T12:00:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> ."
24
+ ]
25
+
26
+ Setting site specific application url
27
+ -------------------------------------
28
+ The default application url is 'en.wikipedia.org/wiki/Triplestore' where you can read more about triplestores.
29
+ To change this to your site specific url, use this (in a Rails initializer for example):
30
+
31
+ TripleParser::Settings.application_domain = 'undervale.co.uk'
32
+
33
+ Playground
34
+ ----------
35
+ A simple Sinatra site is included, where you can enter triples and see how they converted by TripleParser.to_rdf
36
+
37
+ To play:
38
+
39
+ ruby web.rb
40
+
41
+ The page can then be viewed at http://localhost:4567
42
+
43
+ Enter your triples in the text area and click submit. The output will appear below the text area
data/lib/main.rb ADDED
@@ -0,0 +1,16 @@
1
+ require_relative 'triple_parser'
2
+
3
+ text = <<EOF
4
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c type_is Event
5
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c name_is string:'Troops tighten grip on Taliban stronghold'
6
+ id:9108fe02-0bbb-4ed9-890f-b454877ce12c has_time id:0237eb08-e4a5-463c-baaa-5a28f2b63707
7
+ <http://www.undervale.co.uk/things/9108fe02-0bbb-4ed9-890f-b454877ce12c#id> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/NET/c4dm/event.owl#Event>.
8
+ EOF
9
+
10
+ TripleParser.input(text)
11
+
12
+ triples = TripleParser.triples
13
+
14
+ triples.each do |t|
15
+ p t.object.value
16
+ end
@@ -0,0 +1,183 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class BracketedUrlSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ bracketed_url_pattern =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'bracketed_url'
13
+ end
14
+
15
+ private
16
+ def self.bracketed_url_pattern
17
+ /\<http\:\/\/[\S]+\>/
18
+ end
19
+
20
+ def get_parts
21
+ type_value_from_bracketed_url.merge(
22
+ :url => get_url
23
+ )
24
+ end
25
+
26
+ def get_url
27
+ url_pattern = /http\:\/\/[\w\-\.]+\.[a-zA-Z]{2,7}(?:\/[\w\-\._]+)*/
28
+ match(url_pattern)[0]
29
+ end
30
+
31
+
32
+ def type_value_from_bracketed_url
33
+ if text_after_hash_pattern =~ self
34
+ type_value_from_text_after_hash_url
35
+
36
+ elsif resource_url_pattern =~ self
37
+ type_value_for_resource
38
+
39
+ elsif ontology_url_pattern =~ self
40
+ type_value_for_ontology
41
+
42
+ elsif dc_terms_pattern =~ self
43
+ type_value_for_dc_terms
44
+
45
+ elsif asset_pattern =~ self
46
+ type_value_for_asset
47
+
48
+ else
49
+ type_value_for_unknown_url
50
+
51
+ end
52
+ end
53
+
54
+ def text_after_hash_pattern
55
+ /\#([a-zA-Z]+)/
56
+ end
57
+
58
+ def after_hash
59
+ @after_hash ||= match(text_after_hash_pattern)[1] if match(text_after_hash_pattern)
60
+ end
61
+
62
+ def type_value_from_text_after_hash_url
63
+
64
+
65
+ if after_hash == 'id'
66
+ type_value_for_id_after_hash
67
+
68
+ elsif xml_data_pattern =~ self
69
+ type_value_for_xml_schema
70
+
71
+ elsif rdf_url_pattern =~ self
72
+ type_value_for_rdf
73
+
74
+ elsif owl_pattern =~ self
75
+ type_value_for_owl
76
+
77
+ else
78
+ {}
79
+ end
80
+ end
81
+
82
+ def xml_data_pattern
83
+ /["'](.+)["']\^{2}\<http/
84
+ end
85
+
86
+ def type_value_for_xml_schema
87
+ {
88
+ :type => "xml:#{underscore(after_hash)}",
89
+ :value => match(xml_data_pattern)[1]
90
+ }
91
+ end
92
+
93
+ def owl_pattern
94
+ /\.owl#/
95
+ end
96
+
97
+ def type_value_for_owl
98
+ type = match(text_before_hash_pattern)[1]
99
+ {
100
+ :type => "owl:#{type.gsub(/\.owl/, "")}",
101
+ :value => after_hash
102
+ }
103
+ end
104
+
105
+ def rdf_url_pattern
106
+ /rdf\-syntax\-ns/
107
+ end
108
+
109
+ def type_value_for_rdf
110
+ {
111
+ :type => 'rdf',
112
+ :value => after_hash
113
+ }
114
+ end
115
+
116
+ def type_value_for_id_after_hash
117
+ {
118
+ :type => 'id',
119
+ :value => match(text_before_hash_pattern)[1]
120
+ }
121
+ end
122
+
123
+ def text_before_hash_pattern
124
+ /([\w\-\._]*)\#/
125
+ end
126
+
127
+ def type_value_for_resource
128
+ {
129
+ :type => match(resource_url_pattern)[1],
130
+ :value => match(last_element_of_url_pattern)[1]
131
+ }
132
+ end
133
+
134
+ def ontology_url_pattern
135
+ /data\.press\.net\/ontology\/(?:\w+\/)+(\w+)/
136
+ end
137
+
138
+ def type_value_for_ontology
139
+ {
140
+ :type => 'ontology',
141
+ :value => match(ontology_url_pattern)[1]
142
+ }
143
+ end
144
+
145
+ def dc_terms_pattern
146
+ /\/dc\/terms\/([a-zA-Z_]+)/
147
+ end
148
+
149
+ def type_value_for_dc_terms
150
+ {
151
+ :type => 'dc:terms',
152
+ :value => match(dc_terms_pattern)[1]
153
+ }
154
+ end
155
+
156
+ def asset_pattern
157
+ /\/ontologies\/asset\/([a-zA-Z_]+)/
158
+ end
159
+
160
+ def type_value_for_asset
161
+ {
162
+ :type => 'asset',
163
+ :value => match(asset_pattern)[1]
164
+ }
165
+ end
166
+
167
+ def type_value_for_unknown_url
168
+ {
169
+ :url => get_url
170
+ }
171
+ end
172
+
173
+ def last_element_of_url_pattern
174
+ /\/([a-zA-Z_]+)\>/
175
+ end
176
+
177
+ def resource_url_pattern
178
+ /(#{resource_identifiers.join('|')})\/[a-zA-Z_]+\>/
179
+ end
180
+
181
+ end
182
+
183
+ end
@@ -0,0 +1,95 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class ColonSeparatedSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ colon_separated_rdf_pattern =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'colon_separated'
13
+ end
14
+
15
+ private
16
+ def self.colon_separated_rdf_pattern
17
+ colon_pair = '[a-z\-]+:[\w-]+' # this:example
18
+ colon_followed_by_quoted_string = %q{:(['"].+['"]|\w+)} # :'this example'
19
+ function = '\(.+\)' # (this, example)
20
+ colon_separated_text = "#{colon_pair}(#{colon_followed_by_quoted_string}|#{function})?" # this:example or this:example:'with string' or this:function(example)
21
+
22
+ Regexp.new(colon_separated_text)
23
+ end
24
+
25
+ def get_parts
26
+ if function_pattern =~ self
27
+ get_parts_for_function
28
+
29
+ elsif ontologies.include?(self)
30
+ get_ontology
31
+
32
+ elsif double_colon_first_elements.include?(before_colon)
33
+ get_double_colon_entry
34
+
35
+ elsif include?(':')
36
+ get_parts_for_type_value_pair
37
+
38
+ else
39
+ raise "Unable to get parts from '#{self}'"
40
+ end
41
+ end
42
+
43
+ def function_pattern
44
+ /^[\w_:]+\(.+\)/
45
+ end
46
+
47
+ def get_parts_for_function
48
+ opening_bracket = index('(')
49
+ closing_bracket = index(')')
50
+ name = self[0, opening_bracket]
51
+ @arguments = self[opening_bracket + 1..closing_bracket - 1].split(/[\,\s]+/)
52
+ @arguments = @arguments.collect!{|r| TMaker.brew(r)}
53
+ {:type => 'function', :value => name}
54
+ end
55
+
56
+ def get_parts_for_type_value_pair
57
+ {:type => before_colon, :value => after_colon}
58
+ end
59
+
60
+ def before_colon
61
+ @before_colon ||= self[0, (index(':'))] if include?(':')
62
+ end
63
+
64
+ def after_colon
65
+ @after_colon ||= self[index(':') + 1..length] if include?(':')
66
+ end
67
+
68
+ def remove_bracketing_quotes(text)
69
+ if /^'.+'$/ =~ text || /^".+"$/ =~ text
70
+ return text.gsub(/^['"]/, "").gsub(/['"]$/, "")
71
+ else
72
+ return text
73
+ end
74
+ end
75
+
76
+ def double_colon_first_elements
77
+ %w{xml owl dc text}
78
+ end
79
+
80
+ def get_double_colon_entry
81
+ elements = split(':')
82
+ text_after_second_colon = elements[2..elements.length].join(':')
83
+ {
84
+ :type => "#{elements[0]}:#{elements[1]}",
85
+ :value => remove_bracketing_quotes(text_after_second_colon)
86
+ }
87
+ end
88
+
89
+ def get_ontology
90
+ {:type => 'ontology', :value => self}
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,41 @@
1
+ module TripleParser
2
+
3
+ class RegionalTextSplitter < Splitter
4
+
5
+ def self.can_split?(string)
6
+ quoted_text_ampersand_and_language_identifier =~ string
7
+ end
8
+
9
+ def rdf_style
10
+ 'regional_text'
11
+ end
12
+
13
+ private
14
+ def self.quoted_text_ampersand_and_language_identifier
15
+ /('.*'|".*")\@[\w\-_]+/
16
+ end
17
+
18
+ def get_parts
19
+ {
20
+ :type => "text:#{region}",
21
+ :value => text
22
+ }
23
+ end
24
+
25
+ def region
26
+ after_ampersand
27
+ end
28
+
29
+ def text
30
+ before_ampersand.gsub(/['"]/, "")
31
+ end
32
+
33
+ def after_ampersand
34
+ split(/@/).last
35
+ end
36
+
37
+ def before_ampersand
38
+ split(/@/).first
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,12 @@
1
+
2
+ module TripleParser
3
+ module Settings
4
+ def self.application_domain
5
+ @@application_url ||= 'en.wikipedia.org/wiki/Triplestore'
6
+ end
7
+
8
+ def self.application_domain=(url)
9
+ @@application_url = url
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,52 @@
1
+
2
+ module TripleParser
3
+ class Splitter < String
4
+
5
+ attr_accessor :parts, :arguments, :rdf_style, :url
6
+
7
+ def self.can_split?(string)
8
+ raise "Need to define test to determine if string can be converted using this class"
9
+ end
10
+
11
+ def parts
12
+ @parts ||= get_parts
13
+ end
14
+
15
+ def type
16
+ parts[:type]
17
+ end
18
+
19
+ def value
20
+ parts[:value]
21
+ end
22
+
23
+ def url
24
+ parts[:url]
25
+ end
26
+
27
+ def rdf_style
28
+ raise "Need to define string that identifies rdf stype"
29
+ end
30
+
31
+ private
32
+ def get_parts
33
+ raise "Need to define how to split text input into seperate parts"
34
+ end
35
+
36
+ def underscore(text)
37
+ while letter_before_capital = text.index(/[a-z][A-Z]/)
38
+ text.insert(letter_before_capital + 1, '_')
39
+ end
40
+ text.downcase
41
+ end
42
+
43
+ def resource_identifiers
44
+ %w{resource domain}
45
+ end
46
+
47
+ def ontologies
48
+ %w{about mentions}
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,63 @@
1
+ module TripleParser
2
+ class TMaker
3
+ require_relative 'third'
4
+ require_relative 'bracketed_url_splitter'
5
+ require_relative 'colon_separated_splitter'
6
+ require_relative 'variable_splitter'
7
+ require_relative 'unspecified_splitter'
8
+ require_relative 'regional_text_splitter'
9
+
10
+ attr_accessor :arguments, :rdf_style, :url
11
+
12
+ def self.brew(*args)
13
+ t_maker = new(*args)
14
+ t_maker.third
15
+ end
16
+
17
+ def initialize(*args)
18
+ @string = args.first
19
+ end
20
+
21
+ def third
22
+ begin
23
+ Third.new(
24
+ @string,
25
+ :type => split_text.type,
26
+ :value => split_text.value,
27
+ :url => split_text.url,
28
+ :rdf_style => split_text.rdf_style,
29
+ :arguments => split_text.arguments
30
+ )
31
+ rescue
32
+ Third.new(@string, :rdf_style => 'unknown')
33
+ end
34
+ end
35
+
36
+ def split_text
37
+ @split_text ||= get_split_text
38
+ end
39
+
40
+ private
41
+ def get_split_text
42
+ splitters.each do |splitter|
43
+ if splitter.can_split?(@string)
44
+ split_text = splitter.new(@string)
45
+ return split_text
46
+ end
47
+ end
48
+ raise "Unable to get parts for third from: '#{@string}'"
49
+ end
50
+
51
+ def splitters
52
+ [
53
+ BracketedUrlSplitter,
54
+ ColonSeparatedSplitter,
55
+ VariableSplitter,
56
+ RegionalTextSplitter,
57
+ UnspecifiedSplitter
58
+ ]
59
+ end
60
+
61
+
62
+ end
63
+ end
@@ -0,0 +1,17 @@
1
+ module TripleParser
2
+
3
+ class Third < String
4
+
5
+ attr_accessor :type, :value, :arguments, :rdf_style, :url
6
+
7
+ def initialize(string, args = {})
8
+ @type = args[:type]
9
+ @value = args[:value]
10
+ @arguments = args[:arguments]
11
+ @rdf_style = args[:rdf_style]
12
+ @url = args[:url]
13
+ super(string)
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,133 @@
1
+
2
+ module TripleParser
3
+ class ToRdf
4
+
5
+ def initialize(third)
6
+ @third = third
7
+ end
8
+
9
+ def to_s
10
+ get_output.to_s
11
+ end
12
+
13
+ protected
14
+ def get_output
15
+ if third_type
16
+ pass_to_type_method
17
+ else
18
+ unknown
19
+ end
20
+ end
21
+
22
+ def third_type
23
+ @third_type ||= @third.type if @third.type
24
+ end
25
+
26
+ def pass_to_type_method
27
+ if type_if_known_colon_pair?
28
+ send(colon_prefix)
29
+
30
+ elsif method_exists_for?(third_type)
31
+ send(third_type)
32
+
33
+ else
34
+ unknown_type
35
+ end
36
+ end
37
+
38
+ def type_if_known_colon_pair?
39
+ if colon_pair_pattern =~ third_type
40
+ method_exists_for?(colon_prefix)
41
+ end
42
+ end
43
+
44
+ def method_exists_for?(name)
45
+ self.class.instance_methods.include?(name.to_sym)
46
+ end
47
+
48
+ # prefix:suffix
49
+ def colon_pair_pattern
50
+ /([a-zA-Z\-\_]+)\:([a-zA-Z\-\_]+)/
51
+ end
52
+
53
+ def colon_match
54
+ @colon_match ||= colon_pair_pattern.match(third_type)
55
+ end
56
+
57
+ def colon_prefix
58
+ @colon_prefix ||= colon_match[1]
59
+ end
60
+
61
+ def colon_suffix
62
+ @colon_suffix ||= colon_match[2]
63
+ end
64
+
65
+ def owl
66
+ "<http://purl.org/NET/c4dm/#{colon_suffix}.owl##{@third.value}>"
67
+ end
68
+
69
+
70
+ def xml
71
+ %Q{"#{@third.value}"^^<http://www.w3.org/2001/XMLSchema##{camelcase(colon_suffix)}>}
72
+ end
73
+
74
+ def dc
75
+ "<http://purl.org/dc/#{colon_suffix}/#{@third.value}>"
76
+ end
77
+
78
+ def text
79
+ %Q{"#{@third.value}"@#{colon_suffix}}
80
+ end
81
+
82
+ def unknown
83
+ @third
84
+ end
85
+
86
+ def unknown_type
87
+ "#{third_type}:#{@third.value}"
88
+ end
89
+
90
+ def id
91
+ "<http://#{Settings.application_domain}/things/#{@third.value}#id>"
92
+ end
93
+
94
+ def domain
95
+ "<http://#{Settings.application_domain}/ontologies/domain/name>"
96
+ end
97
+
98
+ def resource
99
+ "<http://dbpedia.org/resource/#{@third.value}>"
100
+ end
101
+
102
+ def ontology
103
+ "<http://data.press.net/ontology/tag/#{@third.value}>"
104
+ end
105
+
106
+ def asset
107
+ "<http://#{Settings.application_domain}/ontologies/asset/#{@third.value}>"
108
+ end
109
+
110
+ def function
111
+ arguments = @third.arguments.collect{|a| self.class.new(a)}
112
+ "#{@third.value}(#{arguments.join(' ')})"
113
+ end
114
+
115
+ def var
116
+ "?#{@third.value}"
117
+ end
118
+
119
+ def rdf
120
+ "<http://www.w3.org/1999/02/22-rdf-syntax-ns##{@third.value}>"
121
+ end
122
+
123
+ def camelcase(text)
124
+ while underscore_pos = text.index(/_[a-z]/)
125
+ letter_after_pos = underscore_pos + 1
126
+ letter_after = text[letter_after_pos, 1]
127
+ text[underscore_pos..letter_after_pos] = letter_after.upcase
128
+ end
129
+ return text
130
+ end
131
+
132
+ end
133
+ end
@@ -0,0 +1,123 @@
1
+ module TripleParser
2
+ class TripleSet
3
+ require_relative 't_maker'
4
+
5
+ def initialize(triple)
6
+ @triple = triple
7
+ end
8
+
9
+ def parts
10
+ @parts ||= get_parts
11
+ end
12
+
13
+ def subject
14
+ @subject ||= parts[0]
15
+ end
16
+
17
+ def predicate
18
+ @predicate ||= parts[1]
19
+ end
20
+
21
+ def object
22
+ @object ||= (!parts[2] || parts[2].empty?) ? nil : parts[2]
23
+ end
24
+
25
+ private
26
+ def get_parts
27
+ match = pattern_to_split_triple.match(@triple)
28
+ matches = [1, 2, 3].collect{|i| match[i] if i != @skip_triple_part}.compact
29
+ matches.collect{|m| TMaker.brew(m)}
30
+ end
31
+
32
+ def pattern_to_split_triple
33
+ if triple_is_function?
34
+ @skip_triple_part = 3
35
+ pattern_to_split_function
36
+ else
37
+ triple_spitting_pattern
38
+ end
39
+ end
40
+
41
+ def triple_is_function?
42
+ function_pattern =~ @triple
43
+ end
44
+
45
+ def function_pattern
46
+
47
+ Regexp.new([
48
+ start_with_possible_white_space_pattern,
49
+ start_variable_or_bracketed_url_pattern,
50
+ receiving_variable_or_bracketed_url_pattern,
51
+ function_name_pattern,
52
+ function_arguments_pattern,
53
+ closing_white_space_or_period_pattern,
54
+ ].join)
55
+
56
+ end
57
+
58
+ def pattern_to_split_function
59
+
60
+ receiving_element_pattern = '[\w?:\/_\-#<>\.]+'
61
+
62
+ Regexp.new(
63
+ [
64
+ '(',
65
+ receiving_element_pattern,
66
+ ')',
67
+ spaces,
68
+ '(',
69
+ function_name_pattern,
70
+ function_arguments_pattern,
71
+ ')'
72
+ ].join
73
+ )
74
+
75
+ end
76
+
77
+ def triple_spitting_pattern
78
+ triple_containing_single_quoted_text = %q{\S*\'.*\'\S*}
79
+ triple_containing_double_quoted_text = %q{\S*\".*\"\S*}
80
+ text_not_split_by_spaces = '\S*'
81
+ triple = [triple_containing_single_quoted_text, triple_containing_double_quoted_text, text_not_split_by_spaces].join('|')
82
+ spaced_triples = Array.new(3, "(#{triple})").join('\s+')
83
+ Regexp.new(spaced_triples)
84
+ end
85
+
86
+ def spaces
87
+ '\s+'
88
+ end
89
+
90
+ def start_with_possible_white_space_pattern
91
+ '^\s*'
92
+ end
93
+
94
+ def start_variable_or_bracketed_url_pattern
95
+ '(\?|<http:\/\/)'
96
+ end
97
+
98
+ def receiving_variable_or_bracketed_url_pattern
99
+ '[\w\/\-_#\.]+>?\s+'
100
+ end
101
+
102
+ def basic_text_pattern
103
+ '\??[\w_\-:]+'
104
+ end
105
+
106
+ def function_name_pattern
107
+ '[\w_\-:]+'
108
+ end
109
+
110
+ def function_arguments_pattern
111
+ %q{\(([\w_\?:"']+[\s\,]*)+\)}
112
+ end
113
+
114
+ def closing_white_space_or_period_pattern
115
+ '[\s\.]*$'
116
+ end
117
+
118
+ def standard_rdf_element_or_text_pattern
119
+ '(?:<.*>|[\w\?\-:]+)'
120
+ end
121
+
122
+ end
123
+ end
@@ -0,0 +1,27 @@
1
+ module TripleParser
2
+
3
+ require_relative 'splitter'
4
+
5
+ class UnspecifiedSplitter < Splitter
6
+
7
+ def self.can_split?(string)
8
+ any_word_possibly_hyphenated =~ string
9
+ end
10
+
11
+ def rdf_style
12
+ 'unspecified'
13
+ end
14
+
15
+ private
16
+ def self.any_word_possibly_hyphenated
17
+ /^\s*[\w\-]*\s*$/
18
+ end
19
+
20
+ def get_parts
21
+ {}
22
+ end
23
+
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,30 @@
1
+ module TripleParser
2
+
3
+ class VariableSplitter < Splitter
4
+
5
+ def self.can_split?(string)
6
+ any_word_starting_with_question_mark =~ string
7
+ end
8
+
9
+ def rdf_style
10
+ 'variable'
11
+ end
12
+
13
+ private
14
+ def self.any_word_starting_with_question_mark
15
+ /^\s*\?[A-Za-z][\w\-]*\s*$/
16
+ end
17
+
18
+ def get_parts
19
+ {
20
+ :type => 'var',
21
+ :value => variable_name
22
+ }
23
+ end
24
+
25
+ def variable_name
26
+ self[1..length]
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,46 @@
1
+ require_relative 'triple_parser/t_maker'
2
+ require_relative 'triple_parser/triple_set'
3
+ require_relative 'triple_parser/to_rdf'
4
+ require_relative 'triple_parser/settings'
5
+
6
+
7
+ module TripleParser
8
+ def self.input(new_input)
9
+ @input = new_input
10
+ end
11
+
12
+ def self.triples
13
+ @triples = Array.new
14
+ case @input.class.to_s
15
+ when 'String'
16
+ @input.each_line do |triple|
17
+ next if /^\s*$/ =~ triple
18
+ @triples << TripleSet.new(triple)
19
+ end
20
+ when 'Array'
21
+ @input.compact.each do |triple|
22
+ @triples << TripleSet.new(triple)
23
+ end
24
+ else
25
+ raise "Input format not recognised"
26
+ end
27
+
28
+ return @triples
29
+ end
30
+
31
+ def self.to_rdf(input)
32
+ @input = input
33
+ output = triples.collect do |t|
34
+ [
35
+ get_rdf_for(t.subject),
36
+ get_rdf_for(t.predicate),
37
+ get_rdf_for(t.object)
38
+ ].join(' ') + " ."
39
+ end
40
+ return output
41
+ end
42
+
43
+ def self.get_rdf_for(third)
44
+ ToRdf.new(third).to_s if third
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: triple_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.9
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Rob Nichols
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-07 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Triple Parser - Parses RDF triples and converts them into standard format
15
+ email: rob@undervale.co.uk
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - lib/main.rb
22
+ - lib/triple_parser.rb
23
+ - lib/triple_parser/triple_set.rb
24
+ - lib/triple_parser/variable_splitter.rb
25
+ - lib/triple_parser/third.rb
26
+ - lib/triple_parser/t_maker.rb
27
+ - lib/triple_parser/colon_separated_splitter.rb
28
+ - lib/triple_parser/splitter.rb
29
+ - lib/triple_parser/regional_text_splitter.rb
30
+ - lib/triple_parser/to_rdf.rb
31
+ - lib/triple_parser/unspecified_splitter.rb
32
+ - lib/triple_parser/bracketed_url_splitter.rb
33
+ - lib/triple_parser/settings.rb
34
+ homepage: https://github.com/reggieb/triple_parser
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.10
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Triple Parser - Parses RDF triples and converts them into standard format
58
+ test_files: []