mida 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.rdoc +21 -0
- data/README.rdoc +68 -0
- data/Rakefile +26 -0
- data/TODO.rdoc +6 -0
- data/lib/mida.rb +6 -0
- data/lib/mida/document.rb +61 -0
- data/lib/mida/item.rb +100 -0
- data/lib/mida/property.rb +70 -0
- data/spec/document_spec.rb +684 -0
- data/spec/item_spec.rb +393 -0
- data/spec/property_spec.rb +152 -0
- data/spec/spec_helper.rb +41 -0
- metadata +172 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
def element_add_attribute(element, attribute, value)
|
4
|
+
if value
|
5
|
+
attr = mock(Nokogiri::XML::Attr)
|
6
|
+
if value != true
|
7
|
+
attr.stub!(:value).and_return(value)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
element.should_receive(:attribute).any_number_of_times.with(attribute).and_return(attr)
|
11
|
+
element
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return a mock Nokogiri::XML::Element
|
15
|
+
def mock_element(tag, attributes={}, inner_text=nil, search_return=[], id_searches={})
|
16
|
+
element = mock(Nokogiri::XML::Element)
|
17
|
+
|
18
|
+
['id', 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'].each do |name|
|
19
|
+
attributes[name] = nil unless attributes.has_key?(name)
|
20
|
+
end
|
21
|
+
attributes.each do |name, value|
|
22
|
+
element_add_attribute(element, name, value)
|
23
|
+
end
|
24
|
+
|
25
|
+
element.stub!(:inner_text).and_return(inner_text)
|
26
|
+
element.stub!(:name).and_return(tag)
|
27
|
+
|
28
|
+
element.should_receive(:search).any_number_of_times.with('./*').and_return(search_return)
|
29
|
+
|
30
|
+
# Set a valid return element for each likely id
|
31
|
+
('a'..'z').each do |id|
|
32
|
+
stub = element.should_receive(:search).any_number_of_times.with("//*[@id='#{id}']")
|
33
|
+
if id_searches.has_key?(id)
|
34
|
+
stub.and_return([id_searches[id]])
|
35
|
+
else
|
36
|
+
stub.and_return([])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
element
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mida
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 0.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Lawrence Woodman
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-04-12 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: rspec
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :development
|
45
|
+
version_requirements: *id002
|
46
|
+
description: |
|
47
|
+
= Mida
|
48
|
+
|
49
|
+
* {Mida Project Page}[https://github.com/LawrenceWoodman/mida]
|
50
|
+
* {Mida Bug Tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
51
|
+
|
52
|
+
== Description
|
53
|
+
A Microdata[http://en.wikipedia.org/wiki/Microdata_(HTML5)] parser and
|
54
|
+
extractor library for ruby.
|
55
|
+
This is based on the latest Published version of the Microdata Specification
|
56
|
+
dated {5th April 2011}[http://www.w3.org/TR/2011/WD-microdata-20110405/].
|
57
|
+
|
58
|
+
== Installation
|
59
|
+
With Ruby and Rubygems:
|
60
|
+
gem install mida
|
61
|
+
|
62
|
+
=== Requirements:
|
63
|
+
|
64
|
+
* +Nokogiri+
|
65
|
+
|
66
|
+
== Usage
|
67
|
+
The following examples assume that you have required +mida+ and
|
68
|
+
+open-uri+.
|
69
|
+
|
70
|
+
=== Extracting Microdata from a page
|
71
|
+
All the Microdata is extracted from a page when a new <tt>Mida::Document</tt> instance
|
72
|
+
is created.
|
73
|
+
|
74
|
+
To extract all the Microdata from a webpage:
|
75
|
+
url = 'http://example.com'
|
76
|
+
open(url) {|f| doc = Mida::Document.new(f, url)}
|
77
|
+
|
78
|
+
The top-level +Items+ will be held in an array accessible via
|
79
|
+
<tt>doc.items</tt>.
|
80
|
+
|
81
|
+
To simply list all the top-level +Items+ that have been found:
|
82
|
+
puts doc.items
|
83
|
+
|
84
|
+
=== Searching
|
85
|
+
If you want to search for an +Item+ that has a specific +itemtype+/vocabulary
|
86
|
+
this can be done with the +search+ method.
|
87
|
+
|
88
|
+
To return all the +Items+ that use one of Google's Review vocabularies:
|
89
|
+
doc.search(%r{http://data-vocabulary\.org.*?review.*?}i)
|
90
|
+
|
91
|
+
=== Inspecting an +Item+
|
92
|
+
Each +Item+ is a <tt>Mida::Item</tt> instance and has three main methods of
|
93
|
+
interest, +type+, +properties+ and +id+.
|
94
|
+
|
95
|
+
To find out the +itemtype+ of the +Item+:
|
96
|
+
puts doc.items.first.type
|
97
|
+
|
98
|
+
To find out the +itemid+ of the +Item+:
|
99
|
+
puts doc.items.first.id
|
100
|
+
|
101
|
+
Properties are returned as a hash containing name/values pairs. The
|
102
|
+
values will be an array of either +String+ or <tt>Mida::Item</tt> instances.
|
103
|
+
|
104
|
+
To see the +properties+ of the +Item+:
|
105
|
+
puts doc.items.first.properties
|
106
|
+
|
107
|
+
== Bugs/Feature Requests
|
108
|
+
If you find a bug or want to make a feature request, please report it at the
|
109
|
+
Mida project's {issues tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
110
|
+
on github.
|
111
|
+
|
112
|
+
== License
|
113
|
+
Copyright (c) 2011 Lawrence Woodman.
|
114
|
+
This software is licensed under the MIT License. Please see the file, LICENSE.rdoc, for details.
|
115
|
+
|
116
|
+
email: lwoodman@vlifesystems.com
|
117
|
+
executables: []
|
118
|
+
|
119
|
+
extensions: []
|
120
|
+
|
121
|
+
extra_rdoc_files:
|
122
|
+
- README.rdoc
|
123
|
+
- LICENSE.rdoc
|
124
|
+
files:
|
125
|
+
- lib/mida.rb
|
126
|
+
- lib/mida/property.rb
|
127
|
+
- lib/mida/item.rb
|
128
|
+
- lib/mida/document.rb
|
129
|
+
- spec/property_spec.rb
|
130
|
+
- spec/document_spec.rb
|
131
|
+
- spec/item_spec.rb
|
132
|
+
- spec/spec_helper.rb
|
133
|
+
- TODO.rdoc
|
134
|
+
- README.rdoc
|
135
|
+
- LICENSE.rdoc
|
136
|
+
- Rakefile
|
137
|
+
has_rdoc: true
|
138
|
+
homepage: http://github.com/LawrenceWoodman/mida
|
139
|
+
licenses: []
|
140
|
+
|
141
|
+
post_install_message:
|
142
|
+
rdoc_options:
|
143
|
+
- --main
|
144
|
+
- README.rdoc
|
145
|
+
require_paths:
|
146
|
+
- lib
|
147
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
148
|
+
none: false
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
segments:
|
153
|
+
- 1
|
154
|
+
- 9
|
155
|
+
version: "1.9"
|
156
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
|
+
none: false
|
158
|
+
requirements:
|
159
|
+
- - ">="
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
segments:
|
162
|
+
- 0
|
163
|
+
version: "0"
|
164
|
+
requirements: []
|
165
|
+
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 1.3.7
|
168
|
+
signing_key:
|
169
|
+
specification_version: 3
|
170
|
+
summary: A Microdata parser
|
171
|
+
test_files: []
|
172
|
+
|