mida 0.3.3 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.rspec +2 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.rdoc +10 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +33 -0
- data/LICENCE.rdoc +1 -1
- data/README.rdoc +4 -4
- data/Rakefile +4 -26
- data/bin/mida +17 -18
- data/lib/mida.rb +1 -1
- data/lib/mida/datatype/iso8601date.rb +2 -0
- data/lib/mida/version.rb +3 -0
- data/lib/mida/vocabularies/schemaorg.rb +1 -1
- data/mida.gemspec +26 -0
- data/resources/schema.org/generate.rb +191 -0
- data/resources/schema.org/vocabularies.json +5860 -0
- data/spec/datatype/generic_spec.rb +1 -1
- data/spec/spec_helper.rb +2 -3
- metadata +406 -360
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6d01fe436dfe46a4496213212df9720593e13754
|
4
|
+
data.tar.gz: 5320742776ee4fb1ca0a171b76ccd72471681c54
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4637578b6ff945acf8adc05a8d464502664870b422f13a2d7fc87c120471df8d46b7bf7232362aced30133db5bccc0162ea2ec0da9708896e99e6c46cc5d49cd
|
7
|
+
data.tar.gz: e2b442573d64d3d601c536d4b18bd9638247022b4962947c5bca8328f91c6266af409d77b8e0be0667f4c8e5ded59ba2830ad23a1c190a073d7d98cb6464fc38
|
data/.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# vi swap files
|
2
|
+
*.swp
|
3
|
+
|
4
|
+
# Gem pkg directory
|
5
|
+
pkg/
|
6
|
+
*.gem
|
7
|
+
*.rbc
|
8
|
+
.bundle
|
9
|
+
.config
|
10
|
+
.yardoc
|
11
|
+
InstalledFiles
|
12
|
+
_yardoc
|
13
|
+
coverage
|
14
|
+
doc/
|
15
|
+
lib/bundler/man
|
16
|
+
rdoc
|
17
|
+
spec/reports
|
18
|
+
test/tmp
|
19
|
+
test/version_tmp
|
20
|
+
tmp
|
21
|
+
.DS_Store
|
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
== 0.3.6 (22nd September 2013)
|
2
|
+
* Corrects Gemfile.lock to work properly with bundler
|
3
|
+
|
4
|
+
== 0.3.5 (22nd September 2013)
|
5
|
+
* Corrects Gemfile.lock to work properly with bundler
|
6
|
+
|
7
|
+
== 0.3.4 (22nd September 2013)
|
8
|
+
* Ensure works on Ruby v1.9.3 and v2.0
|
9
|
+
* Display exception error message from parse_source()
|
10
|
+
|
1
11
|
== 0.3.3 (31st July 2011)
|
2
12
|
* Removed validation from +new+ for +Item+ and +Document+
|
3
13
|
* Created <tt>vocabularies/</tt> to put the vocabularies in
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
mida (0.3.6)
|
5
|
+
blankslate (= 2.1.2.4)
|
6
|
+
nokogiri (>= 1.5)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
blankslate (2.1.2.4)
|
12
|
+
diff-lcs (1.1.3)
|
13
|
+
mini_portile (0.5.1)
|
14
|
+
nokogiri (1.6.0)
|
15
|
+
mini_portile (~> 0.5.0)
|
16
|
+
rake (10.1.0)
|
17
|
+
rspec (2.10.0)
|
18
|
+
rspec-core (~> 2.10.0)
|
19
|
+
rspec-expectations (~> 2.10.0)
|
20
|
+
rspec-mocks (~> 2.10.0)
|
21
|
+
rspec-core (2.10.1)
|
22
|
+
rspec-expectations (2.10.0)
|
23
|
+
diff-lcs (~> 1.1.3)
|
24
|
+
rspec-mocks (2.10.1)
|
25
|
+
|
26
|
+
PLATFORMS
|
27
|
+
ruby
|
28
|
+
|
29
|
+
DEPENDENCIES
|
30
|
+
bundler (~> 1.3)
|
31
|
+
mida!
|
32
|
+
rake
|
33
|
+
rspec (~> 2.10.0)
|
data/LICENCE.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= The MIT Licence
|
2
2
|
|
3
|
-
Copyright (c) 2011 Lawrence Woodman
|
3
|
+
Copyright (c) 2011-2013 Lawrence Woodman <lwoodman@vlifesystems.com>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= Mida
|
2
2
|
|
3
|
-
* {Mida Project Page}[http://lawrencewoodman.github.
|
3
|
+
* {Mida Project Page}[http://lawrencewoodman.github.io/mida]
|
4
4
|
* {Mida Github Repository}[https://github.com/LawrenceWoodman/mida]
|
5
5
|
* {Mida Bug Tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
6
6
|
|
@@ -22,11 +22,11 @@ Mida keeps RubyGems[http://rubygems.org/gems/mida] up-to-date with its latest ve
|
|
22
22
|
|
23
23
|
To use the command line tool, supply it with the urls or filenames that you
|
24
24
|
would like to be parsed (by default each item is output as yaml):
|
25
|
-
mida http://lawrencewoodman.github.
|
25
|
+
mida http://lawrencewoodman.github.io/mida/news/
|
26
26
|
|
27
27
|
If you want to search for specific types you can use the <tt>-t</tt> switch
|
28
28
|
followed by a Regular Expression:
|
29
|
-
mida -t /person/i http://lawrencewoodman.github.
|
29
|
+
mida -t /person/i http://lawrencewoodman.github.io/mida/news/
|
30
30
|
|
31
31
|
For more information look at <tt>mida</tt>'s help:
|
32
32
|
mida -h
|
@@ -132,5 +132,5 @@ Mida project's {issues tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
|
132
132
|
on github.
|
133
133
|
|
134
134
|
== Licence
|
135
|
-
Copyright (c) 2011 Lawrence Woodman.
|
135
|
+
Copyright (c) 2011-2013 Lawrence Woodman <lwoodman@vlifesystems.com>.
|
136
136
|
This software is licensed under the MIT Licence. Please see the file, LICENCE.rdoc, for details.
|
data/Rakefile
CHANGED
@@ -1,28 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
desc "Create Gem"
|
4
|
-
require 'rubygems/package_task'
|
5
|
-
spec = Gem::Specification.new do |s|
|
6
|
-
s.name = "mida"
|
7
|
-
s.summary = "A Microdata parser/extractor library"
|
8
|
-
s.description = "A Microdata parser and extractor library which includes support for the schema.org vocabularies"
|
9
|
-
s.version = "0.3.3"
|
10
|
-
s.author = "Lawrence Woodman"
|
11
|
-
s.email = "lwoodman@vlifesystems.com"
|
12
|
-
s.homepage = %q{http://lawrencewoodman.github.com/mida/}
|
13
|
-
s.platform = Gem::Platform::RUBY
|
14
|
-
s.required_ruby_version = '>=1.9'
|
15
|
-
s.files = Dir['lib/**/*.rb'] + Dir['spec/**/*.rb'] + Dir['*.rdoc'] + Dir['Rakefile']
|
16
|
-
s.executables = ['mida']
|
17
|
-
s.extra_rdoc_files = ['README.rdoc', 'LICENCE.rdoc', 'CHANGELOG.rdoc']
|
18
|
-
s.rdoc_options << '--main' << 'README.rdoc'
|
19
|
-
s.add_dependency('nokogiri', '>= 1.5')
|
20
|
-
s.add_dependency('blankslate')
|
21
|
-
s.add_development_dependency('rspec', '>= 2.0' )
|
22
|
-
s.add_development_dependency('bundler')
|
23
|
-
end
|
24
|
-
Gem::PackageTask.new(spec).define
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
25
3
|
|
26
|
-
desc "Run Specs"
|
27
|
-
require 'rspec/core/rake_task'
|
28
4
|
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
data/bin/mida
CHANGED
@@ -92,29 +92,28 @@ end
|
|
92
92
|
|
93
93
|
def parse_source(source, options)
|
94
94
|
url = get_url
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
end
|
95
|
+
open(source) do |f|
|
96
|
+
doc = Mida::Document.new(f, url)
|
97
|
+
items = if options[:type]
|
98
|
+
doc.search(options[:type])
|
99
|
+
else
|
100
|
+
doc.items
|
101
|
+
end
|
103
102
|
|
104
|
-
|
105
|
-
|
103
|
+
if items.empty?
|
104
|
+
puts "No microdata found in this document."; exit
|
105
|
+
else
|
106
|
+
if options.include?(:count)
|
107
|
+
display_count(items)
|
106
108
|
else
|
107
|
-
|
108
|
-
display_count(items)
|
109
|
-
else
|
110
|
-
display_items(items)
|
111
|
-
end
|
109
|
+
display_items(items)
|
112
110
|
end
|
113
111
|
end
|
114
|
-
rescue
|
115
|
-
puts "Failed to parse: #{source}"
|
116
|
-
exit
|
117
112
|
end
|
113
|
+
rescue => e
|
114
|
+
puts "Failed to parse: #{source}"
|
115
|
+
puts "Error: #{e.to_s}"
|
116
|
+
exit
|
118
117
|
end
|
119
118
|
|
120
119
|
ARGV.each do |source|
|
data/lib/mida.rb
CHANGED
data/lib/mida/version.rb
ADDED
@@ -1 +1 @@
|
|
1
|
-
Dir.glob(File.dirname(__FILE__) + '/schemaorg/*.rb') {|file| require file}
|
1
|
+
Dir.glob(File.dirname(__FILE__) + '/schemaorg/*.rb').reverse {|file| require file}
|
data/mida.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mida/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mida"
|
8
|
+
spec.version = Mida::VERSION
|
9
|
+
spec.author = "Lawrence Woodman"
|
10
|
+
spec.email = "lwoodman@vlifesystems.com"
|
11
|
+
spec.description = "A Microdata parser and extractor library which includes support for the schema.org vocabularies"
|
12
|
+
spec.summary = "A Microdata parser/extractor library"
|
13
|
+
spec.homepage = %q{http://lawrencewoodman.github.io/mida/}
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = ['mida']
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
spec.extra_rdoc_files = ['README.rdoc', 'LICENCE.rdoc', 'CHANGELOG.rdoc']
|
20
|
+
spec.rdoc_options << '--main' << 'README.rdoc'
|
21
|
+
spec.add_dependency('blankslate', '2.1.2.4')
|
22
|
+
spec.add_dependency('nokogiri', '>= 1.5')
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "rspec", "~> 2.10.0"
|
26
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Creates the vocabulary and enumeration classes for schema.org from
|
3
|
+
# vocabularies.json which is held at:
|
4
|
+
# https://github.com/LawrenceWoodman/schema.org_schemas
|
5
|
+
# The classes are created in vocabularies/
|
6
|
+
require 'erb'
|
7
|
+
require 'json'
|
8
|
+
require 'set'
|
9
|
+
|
10
|
+
ENUMERATION_TEMPLATE = <<-EOB
|
11
|
+
require 'mida/datatype'
|
12
|
+
|
13
|
+
module Mida
|
14
|
+
module SchemaOrg
|
15
|
+
|
16
|
+
# <%= type.description %>
|
17
|
+
class <%= type.name %> < Mida::DataType::Enumeration
|
18
|
+
VALID_VALUES = [
|
19
|
+
% num_instances = type.instances.size
|
20
|
+
% type.instances.first(num_instances-1).each do |instance|
|
21
|
+
[::Mida::DataType::URL, %r{http://schema.org/<%= instance %>}i],
|
22
|
+
% end
|
23
|
+
[::Mida::DataType::URL, %r{http://schema.org/<%= type.instances.last %>}i]
|
24
|
+
]
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
EOB
|
30
|
+
|
31
|
+
VOCABULARY_TEMPLATE = <<-EOB
|
32
|
+
require 'mida/vocabulary'
|
33
|
+
|
34
|
+
module Mida
|
35
|
+
module SchemaOrg
|
36
|
+
|
37
|
+
% type.types_used.each do |klass|
|
38
|
+
autoload :<%= klass %>, 'mida/vocabularies/schemaorg/<%= klass.downcase %>'
|
39
|
+
% end
|
40
|
+
|
41
|
+
# <%= type.description %>
|
42
|
+
class <%= type.name %> < Mida::Vocabulary
|
43
|
+
itemtype %r{http://schema.org/<%= type.name %>}i
|
44
|
+
% type.vocabularies.each do |include_vocabulary|
|
45
|
+
include_vocabulary Mida::SchemaOrg::<%= include_vocabulary %>
|
46
|
+
% end
|
47
|
+
% type.properties.each do |property|
|
48
|
+
|
49
|
+
# <%= property.description %>
|
50
|
+
% if property.types.size == 1 && property.types[0].name == 'Text'
|
51
|
+
has_many '<%= property.name %>'
|
52
|
+
% else
|
53
|
+
has_many '<%= property.name %>' do
|
54
|
+
% property.types.each do |prop_type|
|
55
|
+
extract <%= prop_type.full_name %>
|
56
|
+
% end
|
57
|
+
end
|
58
|
+
% end
|
59
|
+
% end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
EOB
|
65
|
+
|
66
|
+
DATATYPES = [
|
67
|
+
{'name' => 'Boolean', 'full_name' => 'Mida::DataType::Boolean'},
|
68
|
+
{'name' => 'Date', 'full_name' => 'Mida::DataType::ISO8601Date'},
|
69
|
+
{'name' => 'Float', 'full_name' => 'Mida::DataType::Float'},
|
70
|
+
{'name' => 'Integer', 'full_name' => 'Mida::DataType::Integer'},
|
71
|
+
{'name' => 'Number', 'full_name' => 'Mida::DataType::Number'},
|
72
|
+
{'name' => 'URL', 'full_name' => 'Mida::DataType::URL'},
|
73
|
+
{'name' => 'Text', 'full_name' => 'Mida::DataType::Text'},
|
74
|
+
]
|
75
|
+
|
76
|
+
class Property
|
77
|
+
|
78
|
+
attr_reader :name, :description, :types
|
79
|
+
|
80
|
+
def initialize(name, description, types)
|
81
|
+
@name = name
|
82
|
+
@description = description
|
83
|
+
@types = (types || []).collect {|type| Type.find(type)}.sort
|
84
|
+
add_text_type(@types)
|
85
|
+
end
|
86
|
+
|
87
|
+
def add_text_type(types)
|
88
|
+
if types.any? {|type| type.vocabulary?} &&
|
89
|
+
types.none? {|type| type.name == 'Text'}
|
90
|
+
@types << Type.find('Text')
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
class Type
|
97
|
+
include Comparable
|
98
|
+
|
99
|
+
attr_reader :name, :full_name, :description, :vocabularies, :properties
|
100
|
+
attr_reader :instances
|
101
|
+
|
102
|
+
def initialize(definition)
|
103
|
+
@name = definition['name']
|
104
|
+
@ancestors = definition['ancestors'] || []
|
105
|
+
@description = definition['description']
|
106
|
+
@vocabularies = definition['vocabularies'] || []
|
107
|
+
@properties = definition['properties'] || []
|
108
|
+
@instances = definition['instances'] || []
|
109
|
+
@full_name = definition['full_name'] || "Mida::SchemaOrg::#{@name}"
|
110
|
+
(@@types ||= []) << self
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns the found type or nil of not found
|
114
|
+
def self.find(name)
|
115
|
+
found_types = @@types.find_all {|type| type.name == name}
|
116
|
+
found_types.any? ? found_types[0] : nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def process_properties
|
120
|
+
@properties = @properties.collect do |property|
|
121
|
+
Property.new(property['name'], property['description'], property['types'])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def types_used
|
126
|
+
types = Set.new
|
127
|
+
@vocabularies.each do |vocabulary|
|
128
|
+
types << vocabulary
|
129
|
+
end
|
130
|
+
|
131
|
+
@properties.each do |property|
|
132
|
+
property.types .each do |type|
|
133
|
+
unless type.datatype?
|
134
|
+
types << type.name
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
types
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_requires(vocabulary)
|
142
|
+
types = types_used(vocabulary)
|
143
|
+
types.collect do |type|
|
144
|
+
"mida/vocabularies/schemaorg/#{type.name.downcase}"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def enumeration?
|
149
|
+
@ancestors.include?('Enumeration')
|
150
|
+
end
|
151
|
+
|
152
|
+
def datatype?
|
153
|
+
full_name =~ /^Mida::DataType::/
|
154
|
+
end
|
155
|
+
|
156
|
+
def vocabulary?
|
157
|
+
!enumeration? && !datatype?
|
158
|
+
end
|
159
|
+
|
160
|
+
def <=>(other)
|
161
|
+
if full_name =~ /^Mida::DataType::Text/ &&
|
162
|
+
other.full_name !=~/^Mida::DataType::Text/
|
163
|
+
1
|
164
|
+
elsif other.full_name =~ /^Mida::DataType::Text/ &&
|
165
|
+
full_name !=~/^Mida::DataType::Text/
|
166
|
+
-1
|
167
|
+
else
|
168
|
+
full_name <=> other.full_name
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
types = JSON.parse(File.read('vocabularies.json'))
|
174
|
+
types = types.collect {|type| Type.new(type)}
|
175
|
+
datatypes = DATATYPES.collect {|type| Type.new(type)}
|
176
|
+
types.each {|type| type.process_properties}
|
177
|
+
|
178
|
+
Dir.mkdir('enumerations') unless File.directory?('enumerations')
|
179
|
+
Dir.mkdir('vocabularies') unless File.directory?('vocabularies')
|
180
|
+
|
181
|
+
types.each do |type|
|
182
|
+
if type.enumeration?
|
183
|
+
File.open("enumerations/#{type.name.downcase}.rb", 'w') do |file|
|
184
|
+
file.puts ERB.new(ENUMERATION_TEMPLATE, 0, '%').result(binding)
|
185
|
+
end
|
186
|
+
elsif type.vocabulary?
|
187
|
+
File.open("vocabularies/#{type.name.downcase}.rb", 'w') do |file|
|
188
|
+
file.puts ERB.new(VOCABULARY_TEMPLATE, 0, '%').result(binding)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|