mida 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.rdoc +21 -0
- data/README.rdoc +68 -0
- data/Rakefile +26 -0
- data/TODO.rdoc +6 -0
- data/lib/mida.rb +6 -0
- data/lib/mida/document.rb +61 -0
- data/lib/mida/item.rb +100 -0
- data/lib/mida/property.rb +70 -0
- data/spec/document_spec.rb +684 -0
- data/spec/item_spec.rb +393 -0
- data/spec/property_spec.rb +152 -0
- data/spec/spec_helper.rb +41 -0
- metadata +172 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
def element_add_attribute(element, attribute, value)
|
4
|
+
if value
|
5
|
+
attr = mock(Nokogiri::XML::Attr)
|
6
|
+
if value != true
|
7
|
+
attr.stub!(:value).and_return(value)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
element.should_receive(:attribute).any_number_of_times.with(attribute).and_return(attr)
|
11
|
+
element
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return a mock Nokogiri::XML::Element
|
15
|
+
def mock_element(tag, attributes={}, inner_text=nil, search_return=[], id_searches={})
|
16
|
+
element = mock(Nokogiri::XML::Element)
|
17
|
+
|
18
|
+
['id', 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'].each do |name|
|
19
|
+
attributes[name] = nil unless attributes.has_key?(name)
|
20
|
+
end
|
21
|
+
attributes.each do |name, value|
|
22
|
+
element_add_attribute(element, name, value)
|
23
|
+
end
|
24
|
+
|
25
|
+
element.stub!(:inner_text).and_return(inner_text)
|
26
|
+
element.stub!(:name).and_return(tag)
|
27
|
+
|
28
|
+
element.should_receive(:search).any_number_of_times.with('./*').and_return(search_return)
|
29
|
+
|
30
|
+
# Set a valid return element for each likely id
|
31
|
+
('a'..'z').each do |id|
|
32
|
+
stub = element.should_receive(:search).any_number_of_times.with("//*[@id='#{id}']")
|
33
|
+
if id_searches.has_key?(id)
|
34
|
+
stub.and_return([id_searches[id]])
|
35
|
+
else
|
36
|
+
stub.and_return([])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
element
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mida
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 0.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Lawrence Woodman
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-04-12 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: rspec
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :development
|
45
|
+
version_requirements: *id002
|
46
|
+
description: |
|
47
|
+
= Mida
|
48
|
+
|
49
|
+
* {Mida Project Page}[https://github.com/LawrenceWoodman/mida]
|
50
|
+
* {Mida Bug Tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
51
|
+
|
52
|
+
== Description
|
53
|
+
A Microdata[http://en.wikipedia.org/wiki/Microdata_(HTML5)] parser and
|
54
|
+
extractor library for ruby.
|
55
|
+
This is based on the latest Published version of the Microdata Specification
|
56
|
+
dated {5th April 2011}[http://www.w3.org/TR/2011/WD-microdata-20110405/].
|
57
|
+
|
58
|
+
== Installation
|
59
|
+
With Ruby and Rubygems:
|
60
|
+
gem install mida
|
61
|
+
|
62
|
+
=== Requirements:
|
63
|
+
|
64
|
+
* +Nokogiri+
|
65
|
+
|
66
|
+
== Usage
|
67
|
+
The following examples assume that you have required +mida+ and
|
68
|
+
+open-uri+.
|
69
|
+
|
70
|
+
=== Extracting Microdata from a page
|
71
|
+
All the Microdata is extracted from a page when a new <tt>Mida::Document</tt> instance
|
72
|
+
is created.
|
73
|
+
|
74
|
+
To extract all the Microdata from a webpage:
|
75
|
+
url = 'http://example.com'
|
76
|
+
open(url) {|f| doc = Mida::Document.new(f, url)}
|
77
|
+
|
78
|
+
The top-level +Items+ will be held in an array accessible via
|
79
|
+
<tt>doc.items</tt>.
|
80
|
+
|
81
|
+
To simply list all the top-level +Items+ that have been found:
|
82
|
+
puts doc.items
|
83
|
+
|
84
|
+
=== Searching
|
85
|
+
If you want to search for an +Item+ that has a specific +itemtype+/vocabulary
|
86
|
+
this can be done with the +search+ method.
|
87
|
+
|
88
|
+
To return all the +Items+ that use one of Google's Review vocabularies:
|
89
|
+
doc.search(%r{http://data-vocabulary\.org.*?review.*?}i)
|
90
|
+
|
91
|
+
=== Inspecting an +Item+
|
92
|
+
Each +Item+ is a <tt>Mida::Item</tt> instance and has three main methods of
|
93
|
+
interest, +type+, +properties+ and +id+.
|
94
|
+
|
95
|
+
To find out the +itemtype+ of the +Item+:
|
96
|
+
puts doc.items.first.type
|
97
|
+
|
98
|
+
To find out the +itemid+ of the +Item+:
|
99
|
+
puts doc.items.first.id
|
100
|
+
|
101
|
+
Properties are returned as a hash containing name/values pairs. The
|
102
|
+
values will be an array of either +String+ or <tt>Mida::Item</tt> instances.
|
103
|
+
|
104
|
+
To see the +properties+ of the +Item+:
|
105
|
+
puts doc.items.first.properties
|
106
|
+
|
107
|
+
== Bugs/Feature Requests
|
108
|
+
If you find a bug or want to make a feature request, please report it at the
|
109
|
+
Mida project's {issues tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
110
|
+
on github.
|
111
|
+
|
112
|
+
== License
|
113
|
+
Copyright (c) 2011 Lawrence Woodman.
|
114
|
+
This software is licensed under the MIT License. Please see the file, LICENSE.rdoc, for details.
|
115
|
+
|
116
|
+
email: lwoodman@vlifesystems.com
|
117
|
+
executables: []
|
118
|
+
|
119
|
+
extensions: []
|
120
|
+
|
121
|
+
extra_rdoc_files:
|
122
|
+
- README.rdoc
|
123
|
+
- LICENSE.rdoc
|
124
|
+
files:
|
125
|
+
- lib/mida.rb
|
126
|
+
- lib/mida/property.rb
|
127
|
+
- lib/mida/item.rb
|
128
|
+
- lib/mida/document.rb
|
129
|
+
- spec/property_spec.rb
|
130
|
+
- spec/document_spec.rb
|
131
|
+
- spec/item_spec.rb
|
132
|
+
- spec/spec_helper.rb
|
133
|
+
- TODO.rdoc
|
134
|
+
- README.rdoc
|
135
|
+
- LICENSE.rdoc
|
136
|
+
- Rakefile
|
137
|
+
has_rdoc: true
|
138
|
+
homepage: http://github.com/LawrenceWoodman/mida
|
139
|
+
licenses: []
|
140
|
+
|
141
|
+
post_install_message:
|
142
|
+
rdoc_options:
|
143
|
+
- --main
|
144
|
+
- README.rdoc
|
145
|
+
require_paths:
|
146
|
+
- lib
|
147
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
148
|
+
none: false
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
segments:
|
153
|
+
- 1
|
154
|
+
- 9
|
155
|
+
version: "1.9"
|
156
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
|
+
none: false
|
158
|
+
requirements:
|
159
|
+
- - ">="
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
segments:
|
162
|
+
- 0
|
163
|
+
version: "0"
|
164
|
+
requirements: []
|
165
|
+
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 1.3.7
|
168
|
+
signing_key:
|
169
|
+
specification_version: 3
|
170
|
+
summary: A Microdata parser
|
171
|
+
test_files: []
|
172
|
+
|