xapian-indexer 1.2.19.1 → 1.2.19.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 72b813e2b0019183e68679aebf13532f607f4adb
4
- data.tar.gz: 3806c150659093f02f1c394b227715790bfbe218
3
+ metadata.gz: 8effe1bf39e7072ee007a20249dd43f0c1c7bbe7
4
+ data.tar.gz: dfd99dfe5b79e17534ec7a26acd89432021183b1
5
5
  SHA512:
6
- metadata.gz: 2583e40e2e9eeecf3247822595e8dbedc09d07fd1bc18ba64c95b72a5ff0413cc0bba51434a69b6ccb61e60669a4f05616d615e3e229f4105512e93315316e12
7
- data.tar.gz: f77f2f725d2fb549b6c0eea955ef709390090916c090e8259c0d94474bbaaa686d340a56d7987711e755591b5e40aa757e9e2e1b88fe0651b753d0304be4b664
6
+ metadata.gz: 847af60f50c2f0da6397b536d0a4cdeea60977cb0945f7719a34d261c4f2bb227744e60af29b70e8a472e553302366fd49fbf683ef4432a4975d416479fca80f
7
+ data.tar.gz: c63cfb012a7f94aed6bb136e3069fa161e0c1e395e664d221a5e1127c4fbeca88de9573f0f6a19798c1cd20903eb5c7a60c28557faa202f4c56e4acaaa6142a0
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Xapian::Indexer
2
2
 
3
- TODO: Write a gem description
3
+ This gem provides basic infrastructure for indexing HTML documents over HTTP into a Xapian database.
4
4
 
5
5
  ## Installation
6
6
 
@@ -86,7 +86,7 @@ The following example shows how to create a spider and index some resources:
86
86
 
87
87
  ## License
88
88
 
89
- Released under the MIT license.
89
+ This code is dual licensed under the MIT license and GPLv3 license.
90
90
 
91
91
  Copyright, 2015, by [Samuel G. D. Williams](http://www.codeotaku.com/samuel-williams).
92
92
 
@@ -1,7 +1,28 @@
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
1
20
 
2
- require 'xapian/indexer/version'
3
- require 'xapian/indexer/resource'
4
- require 'xapian/indexer/spider'
21
+ require_relative 'indexer/extensions/uri'
22
+
23
+ require_relative 'indexer/version'
24
+ require_relative 'indexer/resource'
25
+ require_relative 'indexer/spider'
5
26
 
6
27
  module Xapian
7
28
  module Indexer
@@ -0,0 +1,41 @@
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'uri'
22
+
23
+ class URI::Generic
24
+ # This change allows you to merge relative URLs which otherwise isn't possible.
25
+ def merge0(oth)
26
+ oth = parser.send(:convert_to_uri, oth)
27
+
28
+ if self.absolute? && oth.absolute?
29
+ #raise BadURIError,
30
+ # "both URI are absolute"
31
+ # hmm... should return oth for usability?
32
+ return oth, oth
33
+ end
34
+
35
+ if self.absolute?
36
+ return self.dup, oth
37
+ else
38
+ return oth, oth
39
+ end
40
+ end
41
+ end
@@ -1,17 +1,22 @@
1
- # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
7
2
  #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
12
9
  #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
15
20
 
16
21
  require 'nokogiri'
17
22
 
@@ -1,20 +1,25 @@
1
- # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
7
2
  #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
12
9
  #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
15
20
 
16
21
  require 'net/http'
17
- require 'xapian/indexer/version'
22
+ require_relative '../version'
18
23
 
19
24
  module Xapian
20
25
  module Indexer
@@ -1,17 +1,22 @@
1
- # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
7
2
  #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
12
9
  #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
15
20
 
16
21
  require 'digest/md5'
17
22
  require 'yaml'
@@ -1,17 +1,22 @@
1
- # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
7
2
  #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
12
9
  #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
15
20
 
16
21
  require 'xapian'
17
22
  require 'set'
@@ -1,20 +1,25 @@
1
- # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
1
+ # Copyright, 2015, by Samuel G. D. Williams. <http://www.codeotaku.com>
7
2
  #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
12
9
  #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
15
20
 
16
21
  module Xapian
17
22
  module Indexer
18
- VERSION = "1.2.19.1"
23
+ VERSION = "1.2.19.2"
19
24
  end
20
25
  end
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency 'xapian-core'
20
+ spec.add_dependency 'xapian-core', '~> 1.2.19.1'
21
21
  spec.add_dependency 'nokogiri'
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 1.6"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.19.1
4
+ version: 1.2.19.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -14,16 +14,16 @@ dependencies:
14
14
  name: xapian-core
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 1.2.19.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 1.2.19.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -78,6 +78,7 @@ files:
78
78
  - README.md
79
79
  - Rakefile
80
80
  - lib/xapian/indexer.rb
81
+ - lib/xapian/indexer/extensions/uri.rb
81
82
  - lib/xapian/indexer/extractors/html.rb
82
83
  - lib/xapian/indexer/loaders/http.rb
83
84
  - lib/xapian/indexer/resource.rb