mida_vocabulary 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +29 -0
- data/mida_vocabulary.gemspec +17 -0
- data/resources/schema.org/generate.rb +191 -0
- data/resources/schema.org/vocabularies.json +6329 -0
- metadata +21 -13
data/.gitignore
ADDED
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# just let autotest know about rspec
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
mida_vocabulary (0.1)
|
|
5
|
+
blankslate
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: http://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
blankslate (2.1.2.4)
|
|
11
|
+
diff-lcs (1.1.3)
|
|
12
|
+
rake (0.9.2.2)
|
|
13
|
+
rspec (2.7.0)
|
|
14
|
+
rspec-core (~> 2.7.0)
|
|
15
|
+
rspec-expectations (~> 2.7.0)
|
|
16
|
+
rspec-mocks (~> 2.7.0)
|
|
17
|
+
rspec-core (2.7.1)
|
|
18
|
+
rspec-expectations (2.7.0)
|
|
19
|
+
diff-lcs (~> 1.1.2)
|
|
20
|
+
rspec-mocks (2.7.0)
|
|
21
|
+
|
|
22
|
+
PLATFORMS
|
|
23
|
+
ruby
|
|
24
|
+
|
|
25
|
+
DEPENDENCIES
|
|
26
|
+
bundler
|
|
27
|
+
mida_vocabulary!
|
|
28
|
+
rake
|
|
29
|
+
rspec (~> 2.7.0)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
s.name = "mida_vocabulary"
|
|
3
|
+
s.summary = "Microdata vocabularies for mida parser/extractor library"
|
|
4
|
+
s.description = "Microdata schema.org vocabularies"
|
|
5
|
+
s.version = "0.1.1"
|
|
6
|
+
s.author = "Pavel Evstigneev"
|
|
7
|
+
s.email = "pavel.evst@gmail.com"
|
|
8
|
+
s.homepage = %q{http://github.com/Paxa/mida_vocabulary}
|
|
9
|
+
s.platform = Gem::Platform::RUBY
|
|
10
|
+
s.required_ruby_version = '>=1.9'
|
|
11
|
+
s.files = `git ls-files`.split("\n")
|
|
12
|
+
s.executables = []
|
|
13
|
+
s.extra_rdoc_files = ['README.md']
|
|
14
|
+
s.add_dependency('blankslate')
|
|
15
|
+
s.add_development_dependency('rspec', '~> 2.7.0' )
|
|
16
|
+
s.add_development_dependency('bundler')
|
|
17
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Creates the vocabulary and enumeration classes for schema.org from
|
|
3
|
+
# vocabularies.json which is held at:
|
|
4
|
+
# https://github.com/LawrenceWoodman/schema.org_schemas
|
|
5
|
+
# The classes are created in vocabularies/
|
|
6
|
+
require 'erb'
|
|
7
|
+
require 'json'
|
|
8
|
+
require 'set'
|
|
9
|
+
|
|
10
|
+
ENUMERATION_TEMPLATE = <<-EOB
|
|
11
|
+
require 'mida_vocabulary/datatype'
|
|
12
|
+
|
|
13
|
+
module Mida
|
|
14
|
+
module SchemaOrg
|
|
15
|
+
|
|
16
|
+
# <%= type.description %>
|
|
17
|
+
class <%= type.name %> < Mida::DataType::Enumeration
|
|
18
|
+
VALID_VALUES = [
|
|
19
|
+
% num_instances = type.instances.size
|
|
20
|
+
% type.instances.first(num_instances-1).each do |instance|
|
|
21
|
+
[::Mida::DataType::URL, %r{http://schema.org/<%= instance %>}i],
|
|
22
|
+
% end
|
|
23
|
+
[::Mida::DataType::URL, %r{http://schema.org/<%= type.instances.last %>}i]
|
|
24
|
+
]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
EOB
|
|
30
|
+
|
|
31
|
+
VOCABULARY_TEMPLATE = <<-EOB
|
|
32
|
+
require 'mida_vocabulary/vocabulary'
|
|
33
|
+
|
|
34
|
+
module Mida
|
|
35
|
+
module SchemaOrg
|
|
36
|
+
|
|
37
|
+
% type.types_used.each do |klass|
|
|
38
|
+
autoload :<%= klass %>, 'mida_vocabulary/vocabularies/schemaorg/<%= klass.downcase %>'
|
|
39
|
+
% end
|
|
40
|
+
|
|
41
|
+
# <%= type.description %>
|
|
42
|
+
class <%= type.name %> < Mida::Vocabulary
|
|
43
|
+
itemtype %r{http://schema.org/<%= type.name %>}i
|
|
44
|
+
% type.vocabularies.each do |include_vocabulary|
|
|
45
|
+
include_vocabulary Mida::SchemaOrg::<%= include_vocabulary %>
|
|
46
|
+
% end
|
|
47
|
+
% type.properties.each do |property|
|
|
48
|
+
|
|
49
|
+
# <%= property.description %>
|
|
50
|
+
% if property.types.size == 1 && property.types[0].name == 'Text'
|
|
51
|
+
has_many '<%= property.name %>'
|
|
52
|
+
% else
|
|
53
|
+
has_many '<%= property.name %>' do
|
|
54
|
+
% property.types.each do |prop_type|
|
|
55
|
+
extract <%= prop_type.full_name %>
|
|
56
|
+
% end
|
|
57
|
+
end
|
|
58
|
+
% end
|
|
59
|
+
% end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
EOB
|
|
65
|
+
|
|
66
|
+
DATATYPES = [
|
|
67
|
+
{'name' => 'Boolean', 'full_name' => 'Mida::DataType::Boolean'},
|
|
68
|
+
{'name' => 'Date', 'full_name' => 'Mida::DataType::ISO8601Date'},
|
|
69
|
+
{'name' => 'Float', 'full_name' => 'Mida::DataType::Float'},
|
|
70
|
+
{'name' => 'Integer', 'full_name' => 'Mida::DataType::Integer'},
|
|
71
|
+
{'name' => 'Number', 'full_name' => 'Mida::DataType::Number'},
|
|
72
|
+
{'name' => 'URL', 'full_name' => 'Mida::DataType::URL'},
|
|
73
|
+
{'name' => 'Text', 'full_name' => 'Mida::DataType::Text'},
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
class Property
|
|
77
|
+
|
|
78
|
+
attr_reader :name, :description, :types
|
|
79
|
+
|
|
80
|
+
def initialize(name, description, types)
|
|
81
|
+
@name = name
|
|
82
|
+
@description = description
|
|
83
|
+
@types = (types || []).collect {|type| Type.find(type)}.sort
|
|
84
|
+
add_text_type(@types)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def add_text_type(types)
|
|
88
|
+
if types.any? {|type| type.vocabulary?} &&
|
|
89
|
+
types.none? {|type| type.name == 'Text'}
|
|
90
|
+
@types << Type.find('Text')
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
class Type
|
|
97
|
+
include Comparable
|
|
98
|
+
|
|
99
|
+
attr_reader :name, :full_name, :description, :vocabularies, :properties
|
|
100
|
+
attr_reader :instances
|
|
101
|
+
|
|
102
|
+
def initialize(definition)
|
|
103
|
+
@name = definition['name']
|
|
104
|
+
@ancestors = definition['ancestors'] || []
|
|
105
|
+
@description = definition['description']
|
|
106
|
+
@vocabularies = definition['vocabularies'] || []
|
|
107
|
+
@properties = definition['properties'] || []
|
|
108
|
+
@instances = definition['instances'] || []
|
|
109
|
+
@full_name = definition['full_name'] || "Mida::SchemaOrg::#{@name}"
|
|
110
|
+
(@@types ||= []) << self
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Returns the found type or nil of not found
|
|
114
|
+
def self.find(name)
|
|
115
|
+
found_types = @@types.find_all {|type| type.name == name}
|
|
116
|
+
found_types.any? ? found_types[0] : nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def process_properties
|
|
120
|
+
@properties = @properties.collect do |property|
|
|
121
|
+
Property.new(property['name'], property['description'], property['types'])
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def types_used
|
|
126
|
+
types = Set.new
|
|
127
|
+
@vocabularies.each do |vocabulary|
|
|
128
|
+
types << vocabulary
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
@properties.each do |property|
|
|
132
|
+
property.types .each do |type|
|
|
133
|
+
unless type.datatype?
|
|
134
|
+
types << type.name
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
types
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def get_requires(vocabulary)
|
|
142
|
+
types = types_used(vocabulary)
|
|
143
|
+
types.collect do |type|
|
|
144
|
+
"mida_vocabulary/vocabularies/schemaorg/#{type.name.downcase}"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def enumeration?
|
|
149
|
+
@ancestors.include?('Enumeration')
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def datatype?
|
|
153
|
+
full_name =~ /^Mida::DataType::/
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def vocabulary?
|
|
157
|
+
!enumeration? && !datatype?
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def <=>(other)
|
|
161
|
+
if full_name =~ /^Mida::DataType::Text/ &&
|
|
162
|
+
other.full_name !=~/^Mida::DataType::Text/
|
|
163
|
+
1
|
|
164
|
+
elsif other.full_name =~ /^Mida::DataType::Text/ &&
|
|
165
|
+
full_name !=~/^Mida::DataType::Text/
|
|
166
|
+
-1
|
|
167
|
+
else
|
|
168
|
+
full_name <=> other.full_name
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
types = JSON.parse(File.read('vocabularies.json'))
|
|
174
|
+
types = types.collect {|type| Type.new(type)}
|
|
175
|
+
datatypes = DATATYPES.collect {|type| Type.new(type)}
|
|
176
|
+
types.each {|type| type.process_properties}
|
|
177
|
+
|
|
178
|
+
Dir.mkdir('enumerations') unless File.directory?('enumerations')
|
|
179
|
+
Dir.mkdir('vocabularies') unless File.directory?('vocabularies')
|
|
180
|
+
|
|
181
|
+
types.each do |type|
|
|
182
|
+
if type.enumeration?
|
|
183
|
+
File.open("enumerations/#{type.name.downcase}.rb", 'w') do |file|
|
|
184
|
+
file.puts ERB.new(ENUMERATION_TEMPLATE, 0, '%').result(binding)
|
|
185
|
+
end
|
|
186
|
+
elsif type.vocabulary?
|
|
187
|
+
File.open("vocabularies/#{type.name.downcase}.rb", 'w') do |file|
|
|
188
|
+
file.puts ERB.new(VOCABULARY_TEMPLATE, 0, '%').result(binding)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|