rspider 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ -- MySQL dump 10.11
2
+ --
3
+ -- Host: localhost Database: sphider2
4
+ -- ------------------------------------------------------
5
+ -- Server version 5.0.51a-log
6
+
7
+ /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
8
+ /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
9
+ /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
10
+ /*!40101 SET NAMES utf8 */;
11
+ /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
12
+ /*!40103 SET TIME_ZONE='+00:00' */;
13
+ /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
14
+ /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
15
+ /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
16
+ /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
17
+
18
+ --
19
+ -- Table structure for table `htmls`
20
+ --
21
+
22
+ DROP TABLE IF EXISTS `htmls`;
23
+ SET @saved_cs_client = @@character_set_client;
24
+ SET character_set_client = utf8;
25
+ CREATE TABLE `htmls` (
26
+ `source` varchar(32) NOT NULL default '',
27
+ `url` varchar(255) NOT NULL default '',
28
+ `url_crc32` bigint(16) NOT NULL default '0',
29
+ `html` text,
30
+ `html_crc32` bigint(16) NOT NULL default '0',
31
+ `created` bigint(11) NOT NULL default '0',
32
+ `ukey` varchar(128) NOT NULL default '-',
33
+ PRIMARY KEY (`ukey`),
34
+ KEY `created` (`created`),
35
+ KEY `source` (`source`)
36
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
37
+ SET character_set_client = @saved_cs_client;
38
+
39
+ --
40
+ -- Table structure for table `url_relations`
41
+ --
42
+
43
+ DROP TABLE IF EXISTS `url_relations`;
44
+ SET @saved_cs_client = @@character_set_client;
45
+ SET character_set_client = utf8;
46
+ CREATE TABLE `url_relations` (
47
+ `id` bigint(11) NOT NULL auto_increment,
48
+ `referer` varchar(255) NOT NULL default '-',
49
+ `url` varchar(255) NOT NULL default '-',
50
+ `referer_crc32` bigint(11) NOT NULL,
51
+ `url_crc32` bigint(11) NOT NULL,
52
+ PRIMARY KEY (`id`),
53
+ KEY `idx_referer_crc32` (`referer_crc32`),
54
+ KEY `idx_url_crc32` (`url_crc32`)
55
+ ) ENGINE=MyISAM AUTO_INCREMENT=11089 DEFAULT CHARSET=utf8;
56
+ SET character_set_client = @saved_cs_client;
57
+
58
+ --
59
+ -- Table structure for table `urls`
60
+ --
61
+
62
+ DROP TABLE IF EXISTS `urls`;
63
+ SET @saved_cs_client = @@character_set_client;
64
+ SET character_set_client = utf8;
65
+ CREATE TABLE `urls` (
66
+ `source` varchar(32) NOT NULL default '',
67
+ `url` varchar(256) NOT NULL default '',
68
+ `added` bigint(11) default NULL,
69
+ `visited` bigint(11) default NULL,
70
+ `ukey` varchar(128) NOT NULL default '-',
71
+ `score` int(4) default NULL,
72
+ `errors` int(4) default '0',
73
+ `url_crc32` bigint(11) NOT NULL default '0',
74
+ PRIMARY KEY (`ukey`),
75
+ KEY `visited` (`visited`),
76
+ KEY `added` (`added`),
77
+ KEY `source` (`source`)
78
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
79
+ SET character_set_client = @saved_cs_client;
80
+ /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
81
+
82
+ /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
83
+ /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
84
+ /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
85
+ /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
86
+ /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
87
+ /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
88
+ /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
89
+
90
+ -- Dump completed on 2008-09-08 16:22:18
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: rspider
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.8.4
7
+ date: 2008-09-15 00:00:00 +08:00
8
+ summary: Web cralwer
9
+ require_paths:
10
+ - lib
11
+ email: xurenlu@gmail.com
12
+ homepage: http://www.162cm.com/
13
+ rubyforge_project: rspider
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Renlu Xu
31
+ files:
32
+ - lib/rspider/UrlScorer.rb
33
+ - lib/rspider/UrlStorage.rb
34
+ - lib/rspider/mysql.rb
35
+ - lib/rspider/MysqlUrlRelationStorage.rb
36
+ - lib/rspider/Logger.rb
37
+ - lib/rspider/cookie.rb
38
+ - lib/rspider/RobotRules.rb
39
+ - lib/rspider/SiteLocker.rb
40
+ - lib/rspider/UrlDispatcher.rb
41
+ - lib/rspider/ThreadPool.rb
42
+ - lib/rspider/OptParser.rb
43
+ - lib/rspider/DataWasher.rb
44
+ - lib/rspider/HtmlTidy.rb
45
+ - lib/rspider/ContentStorage.rb
46
+ - lib/rspider/Spider.rb
47
+ - lib/rspider/DocumentExtractor.rb
48
+ - lib/rspider/browser.rb
49
+ - lib/rspider/links.rb
50
+ - lib/rspider/ConfParser.rb
51
+ - lib/rspider/MysqlUrlStorage.rb
52
+ - lib/rspider/Document.rb
53
+ - lib/rspider.rb
54
+ - sql/db.sql
55
+ - Changelog
56
+ - ToDo
57
+ - conf/local.conf
58
+ - Rakefile
59
+ test_files: []
60
+
61
+ rdoc_options: []
62
+
63
+ extra_rdoc_files: []
64
+
65
+ executables:
66
+ - linkcheck.rb
67
+ - main.rb
68
+ extensions: []
69
+
70
+ requirements: []
71
+
72
+ dependencies: []
73
+