cocoa-xml 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ === 0.4.3 / 2010-01-26
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,20 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ lib/cocoa-xml.rb
6
+ lib/cocoa-xml/nodeset.rb
7
+ lib/cocoa-xml/version.rb
8
+ lib/cocoa-xml/nsxmlnode_extras.rb
9
+ lib/cocoa-xml/nsxmldocument_extras.rb
10
+ lib/nokogiri/css.rb
11
+ lib/nokogiri/syntax_error.rb
12
+ lib/nokogiri/css/generated_parser.rb
13
+ lib/nokogiri/css/generated_tokenizer.rb
14
+ lib/nokogiri/css/node.rb
15
+ lib/nokogiri/css/parser.rb
16
+ lib/nokogiri/css/parser.y
17
+ lib/nokogiri/css/syntax_error.rb
18
+ lib/nokogiri/css/tokenizer.rb
19
+ lib/nokogiri/css/tokenizer.rex
20
+ lib/nokogiri/css/xpath_visitor.rb
@@ -0,0 +1,105 @@
1
+ = Cocoa-XML
2
+
3
+ * http://github.com/cehoffman/cocoa-xml
4
+ * http://cehoffman.github.com/cocoa-xml
5
+
6
+ == DESCRIPTION:
7
+
8
+ Cocoa-xml provides a more ruby like interface to Cocoa's NSXMLDocument
9
+ and classes that inherit from NSXMLNode. It provides access to XPath,
10
+ XQuery, and CSS selectors for searching documents.
11
+
12
+ == FEATURES/PROBLEMS:
13
+
14
+ * Cocoa-xml uses native Cocoa xml processing facilities.
15
+ * CSS, XPath, and XQuery can be used to search a document
16
+ * Broken HTML documents can be processed thanks to NSXMLDocument
17
+
18
+ == SYNOPSIS:
19
+
20
+ require 'cocoa-xml'
21
+
22
+ doc = CocoaXML::HTML("http://www.google.com/search?q=cehoffman")
23
+
24
+ doc.css('h3.r a.l').each do |link|
25
+ puts link.text
26
+ end
27
+
28
+ doc.xpath('//h3/a[@class="l"]').each do |link|
29
+ puts link.text
30
+ end
31
+
32
+ doc.xquery('data(//h3/a[@class="l"]')).each do |link|
33
+ puts link
34
+ end
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * MacRuby 0.6 (development version)
39
+ * OS X Snow Leopard (only version supported by MacRuby currently)
40
+
41
+ == INSTALL:
42
+
43
+ $ sudo gem install cocoa-xml
44
+
45
+ == DEVELOPERS:
46
+
47
+ It is advised to use the current development version of {MacRuby}[http://www.macruby.com] from
48
+ the source tree.
49
+
50
+ == CREDITS:
51
+
52
+ CSS selector support is taken from Nokogiri.
53
+
54
+ == LICENSE:
55
+
56
+ (The MIT License)
57
+
58
+ Copyright (c) 2010 Chris Hoffman
59
+
60
+ Permission is hereby granted, free of charge, to any person obtaining
61
+ a copy of this software and associated documentation files (the
62
+ 'Software'), to deal in the Software without restriction, including
63
+ without limitation the rights to use, copy, modify, merge, publish,
64
+ distribute, sublicense, and/or sell copies of the Software, and to
65
+ permit persons to whom the Software is furnished to do so, subject to
66
+ the following conditions:
67
+
68
+ The above copyright notice and this permission notice shall be
69
+ included in all copies or substantial portions of the Software.
70
+
71
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
72
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
73
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
74
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
75
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
76
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
77
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
78
+
79
+ === Nokogiri License
80
+
81
+ (The MIT License)
82
+
83
+ Copyright (c) 2008 - 2009:
84
+
85
+ * {Aaron Patterson}[http://tenderlovemaking.com]
86
+ * {Mike Dalessio}[http://mike.daless.io]
87
+
88
+ Permission is hereby granted, free of charge, to any person obtaining
89
+ a copy of this software and associated documentation files (the
90
+ 'Software'), to deal in the Software without restriction, including
91
+ without limitation the rights to use, copy, modify, merge, publish,
92
+ distribute, sublicense, and/or sell copies of the Software, and to
93
+ permit persons to whom the Software is furnished to do so, subject to
94
+ the following conditions:
95
+
96
+ The above copyright notice and this permission notice shall be
97
+ included in all copies or substantial portions of the Software.
98
+
99
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
100
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
101
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
102
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
103
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
104
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
105
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require "./lib/cocoa-xml/version.rb"
6
+
7
+ Hoe.plugin :gemcutter
8
+ Hoe.plugin :clean
9
+ Hoe.plugin :git
10
+ Hoe.plugin :yard
11
+
12
+ Hoe.spec 'cocoa-xml' do
13
+ self.version = ::CocoaXML::Version
14
+ developer('Chris Hoffman', 'cehoffman@gmail.com')
15
+
16
+ self.rubyforge_name = 'cocoa-xml'
17
+
18
+ self.yard_title = "Cocoa-XML"
19
+ self.yard_markup = 'rdoc'
20
+ self.yard_opts = ['--no-private']
21
+ end
22
+
23
+ # vim: syntax=ruby
@@ -0,0 +1,48 @@
1
+ framework 'Cocoa'
2
+
3
+ require "cocoa-xml/version"
4
+ require "cocoa-xml/nodeset"
5
+ require "cocoa-xml/nsxmldocument_extras"
6
+ require "cocoa-xml/nsxmlnode_extras"
7
+ require "nokogiri/css"
8
+
9
+ module CocoaXML
10
+ # Parse an input HTML source
11
+ #
12
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
13
+ # object that responds to #read, or #to_str
14
+ # @return [NSXMLDocument] An NSXMLDocument set to interpret source as HTML
15
+ def self.HTML(source)
16
+ parse source, NSXMLDocumentTidyHTML
17
+ end
18
+
19
+ # Parse an input XML source
20
+ #
21
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
22
+ # object that responds to #read, or #to_str
23
+ # @return [NSXMLDocument] An NSXMLDocument set to inperpret source as XML
24
+ def self.XML(source)
25
+ parse source, NSXMLDocumentTidyXML
26
+ end
27
+
28
+ private
29
+ # Parse an HTML or XML source
30
+ #
31
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
32
+ # object that responds to #read, or #to_str
33
+ # @param [Number] Constant determing how to interpret input source, xml or
34
+ # html
35
+ # @return [NSXMLDocument]
36
+ def self.parse(source, type)
37
+ error = Pointer.new :object
38
+
39
+ url = (source.is_a?(NSURL) && source) || NSURL.URLWithString(source.to_str) if source.respond_to?(:to_str)
40
+ source = source.read if source.respond_to?(:read)
41
+
42
+ unless url.nil?
43
+ ::NSXMLDocument.alloc.initWithContentsOfURL url, options: type, error: error
44
+ else
45
+ ::NSXMLDocument.alloc.initWithXMLString source, options: type, error: error
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,57 @@
1
+ module CocoaXML
2
+ class NodeSet < Array
3
+ # Collect all the texts of nodes in set and join together
4
+ #
5
+ # @return [String] single string containing text of each element
6
+ def text
7
+ collect { |node| (node.respond_to?(:to_str) && node.to_str) || node.text }.flatten.join
8
+ end
9
+ alias :inner_text :text
10
+
11
+ # Collect all the #to_s representations of elements in array
12
+ #
13
+ # @return [String] single string containing #to_s of each element
14
+ def to_s
15
+ collect { |node| node.to_s }.join
16
+ end
17
+
18
+ # Perform selector on each element in set
19
+ #
20
+ # @param [String] selector css selector to use on each node
21
+ # @return [NodeSet<NSXMLNode, String>] new set resulting from
22
+ # performing selector on each node in set
23
+ # @todo This will bomb if the node has String elements in it
24
+ def css(selector)
25
+ xpath ::Nokogiri::CSS::xpath_for(selector, :prefix => ".//").join
26
+ end
27
+
28
+ # Perform XQuery on each node in set
29
+ #
30
+ # @param [String] query xquery to perform on each node in set
31
+ # @return [NodeSet<NSXMLNode, String>] results of performing
32
+ # query
33
+ # @todo This will bomb if the node has String elements in it
34
+ def xquery(query)
35
+ query.sub! %r{^//}, '' # Root searches to start from nodes
36
+ NodeSet.new(collect { |node| node.xquery(query) }.flatten)
37
+ end
38
+
39
+ # Perform XPath selection on each node in set
40
+ #
41
+ # @param [String] path xpath to follow on each node
42
+ # @return [NodeSet<NSXMLNode>] results of path on each node
43
+ # @todo This will bomb if the node has String elements in it
44
+ def xpath(query)
45
+ query.sub! %r{^//}, '' # Roots searches to start from nodes
46
+ NodeSet.new(collect { |node| node.xpath(query) }.flatten)
47
+ end
48
+
49
+ # Get the value of the attribute for each node in set
50
+ #
51
+ # @param [String] attr attribute to search for on each node
52
+ # @return [Array<String, nil>] value of attribute for each node
53
+ def [](attr)
54
+ collect { |node| node[attr] }.flatten
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,48 @@
1
+ module CocoaXML
2
+ module NSXMLDocumentExtras
3
+ # @private
4
+ def self.included(klass)
5
+ klass.class_eval do
6
+ alias :encoding :characterEncoding
7
+ alias :encoding= :setCharacterEncoding
8
+ end
9
+ end
10
+
11
+ # Encoding of document
12
+ #
13
+ # @see http://www.iana.org/assignments/character-sets Valid Encoding Specifiers
14
+ #
15
+ # @return [NSString] encoding of document
16
+ def encoding
17
+ # Implemented as alias to characterEncoding
18
+ end
19
+
20
+ # Set encoding of document
21
+ #
22
+ # @see file:///Developer/Documentation/DocSets/com.apple.adc.documentation.AppleSnowLeopard.CoreReference.docset/Contents/Resources/Documents/documentation/Cocoa/Reference/Foundation/Classes/NSXMLDocument_Class/Reference/Reference.html#//apple_ref/occ/instm/NSXMLDocument/setCharacterEncoding: Developer Documentation
23
+ # @see http://developer.apple.com/mac/library/documentation/Cocoa/Reference/Foundation/Classes/NSXMLDocument_Class/NSXMLDocument_Class.pdf Apple NSXMLDocument PDF
24
+ #
25
+ # @param [NSString] enc valid character encoding
26
+ # @return [void]
27
+ def encoding=(enc)
28
+ # Implemented as an alias to setCharacterEncoding
29
+ end
30
+
31
+ # Determine if output of document is treated as HTML, e.g. <br> style tags
32
+ def html?
33
+ documentContentKind == NSXMLDocumentHTMLKind
34
+ end
35
+
36
+ # Determine if output of document is treated as XHTML, e.g. <br/> style tags
37
+ def xhtml?
38
+ documentContentKind == NSXMLDocumentXHTMLKind
39
+ end
40
+
41
+ # Determine if output of document is treated as XML
42
+ def xml?
43
+ documentContentKind == NSXMLDocumentXMLKind
44
+ end
45
+ end
46
+ end
47
+
48
+ ::NSXMLDocument.send :include, CocoaXML::NSXMLDocumentExtras
@@ -0,0 +1,106 @@
1
+ module CocoaXML
2
+ # These are a set of methods added onto the Cocoa NSXMLNode class and children.
3
+ module NSXMLNodeExtras
4
+ # Search from this node down using a css selector
5
+ #
6
+ # @param [String] selector selector used to select nodes from document
7
+ # @return [NodeSet<NSXMLNode>] array of nodes matched by selector
8
+ def css(selector)
9
+ xpath ::Nokogiri::CSS::xpath_for(selector, :prefix => ".//").join
10
+ end
11
+
12
+ # Search document using provided path
13
+ #
14
+ # @param [String] path path used to select nodes from document
15
+ # @return [NodeSet<NSXMLNode>] array of nodes matched by selector
16
+ def xpath(path)
17
+ error = Pointer.new(:object)
18
+ results = nodesForXPath path, error: error
19
+
20
+ return NodeSet.new(results) if error[0].nil?
21
+ end
22
+
23
+ # Process document using provided query
24
+ #
25
+ # @param [String] query query used process information in document
26
+ # @return [NodeSet<NSXMLNode, String>] results depends on
27
+ # query. Notice that unlike {#xpath} basic types can also be returned.
28
+ def xquery(query)
29
+ error = Pointer.new(:object)
30
+ results = objectsForXQuery query, error: error
31
+
32
+ return NodeSet.new(results) if error[0].nil?
33
+
34
+ #TODO Do something with the error
35
+ end
36
+
37
+ # Get the contained text from this node and children nodes
38
+ #
39
+ # @return [String] text contents of node
40
+ def text
41
+ xquery('data(.)').join
42
+ end
43
+ alias :inner_text :text
44
+
45
+ # Get the value of an attribute of node
46
+ #
47
+ # @param [String] attr attribute of node to query
48
+ # @return [String, nil] string value of attribute or nil if no attribute
49
+ def [](attr)
50
+ xquery("data(@#{attr})").pop
51
+ end
52
+
53
+ # Set the value of an attribute of node
54
+ #
55
+ # @param [String] attr attribute of node to set
56
+ # @param [#to_s] value value to set attribute to
57
+ # @return ????
58
+ # @todo Find out what this funtion will return
59
+ def []=(attr, value)
60
+ node = attributeForName(attr.to_s)
61
+ node && node.setStringValue(value) || addAttribute(::NSXMLNode.attributeWithName(attr.to_s, stringValue: value))
62
+ end
63
+
64
+ # @private
65
+ def attribute(attr)
66
+ attributeForName(attr.to_s)
67
+ end
68
+
69
+ # @return [String] the xml of this node including children nodes with proper indentation
70
+ def to_s
71
+ XMLStringWithOptions NSXMLNodePrettyPrint
72
+ end
73
+
74
+ # @private
75
+ def self.included(klass)
76
+ klass.class_eval do
77
+ alias :remove :detach
78
+ alias :unlink :detach
79
+ alias :path :XPath
80
+
81
+ # TODO: Why is this not working
82
+ alias :old_children :children
83
+ def children
84
+ NodeSet.new(old_children)
85
+ end
86
+ end
87
+ end
88
+
89
+ # Remove this node from its parent
90
+ #
91
+ # @return [self] this node
92
+ def remove
93
+ # Implemented as an alias to detach
94
+ end
95
+ alias :unlink :remove
96
+
97
+ # An XPath formula to reach this node
98
+ #
99
+ # @return [String] XPath to this node
100
+ def path
101
+ # Implemented as an alias to :XPath
102
+ end
103
+ end
104
+ end
105
+
106
+ ::NSXMLNode.send :include, CocoaXML::NSXMLNodeExtras
@@ -0,0 +1,3 @@
1
+ module CocoaXML
2
+ Version = '0.4.4'
3
+ end
@@ -0,0 +1,27 @@
1
+ require 'nokogiri/css/node'
2
+ require 'nokogiri/css/xpath_visitor'
3
+ require 'nokogiri/css/generated_parser'
4
+ require 'nokogiri/css/generated_tokenizer'
5
+ require 'nokogiri/css/tokenizer'
6
+ require 'nokogiri/css/parser'
7
+ require 'nokogiri/css/syntax_error'
8
+
9
+ module Nokogiri
10
+ # Modules to convert CSS selectors to valid XPath
11
+ # @see http://nokogiri.org/Nokogiri/CSS.html Nokogiri Documentation
12
+ module CSS
13
+ #class << self
14
+ ###
15
+ # Parse this CSS selector in +selector+. Returns an AST.
16
+ def self.parse selector
17
+ Parser.new.parse selector
18
+ end
19
+
20
+ ###
21
+ # Get the XPath for +selector+.
22
+ def self.xpath_for selector, options={}
23
+ Parser.new(options[:ns] || {}).xpath_for selector, options
24
+ end
25
+ #end
26
+ end
27
+ end