rspider 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,90 @@
1
+ -- MySQL dump 10.11
2
+ --
3
+ -- Host: localhost Database: sphider2
4
+ -- ------------------------------------------------------
5
+ -- Server version 5.0.51a-log
6
+
7
+ /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
8
+ /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
9
+ /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
10
+ /*!40101 SET NAMES utf8 */;
11
+ /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
12
+ /*!40103 SET TIME_ZONE='+00:00' */;
13
+ /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
14
+ /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
15
+ /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
16
+ /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
17
+
18
+ --
19
+ -- Table structure for table `htmls`
20
+ --
21
+
22
+ DROP TABLE IF EXISTS `htmls`;
23
+ SET @saved_cs_client = @@character_set_client;
24
+ SET character_set_client = utf8;
25
+ CREATE TABLE `htmls` (
26
+ `source` varchar(32) NOT NULL default '',
27
+ `url` varchar(255) NOT NULL default '',
28
+ `url_crc32` bigint(16) NOT NULL default '0',
29
+ `html` text,
30
+ `html_crc32` bigint(16) NOT NULL default '0',
31
+ `created` bigint(11) NOT NULL default '0',
32
+ `ukey` varchar(128) NOT NULL default '-',
33
+ PRIMARY KEY (`ukey`),
34
+ KEY `created` (`created`),
35
+ KEY `source` (`source`)
36
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
37
+ SET character_set_client = @saved_cs_client;
38
+
39
+ --
40
+ -- Table structure for table `url_relations`
41
+ --
42
+
43
+ DROP TABLE IF EXISTS `url_relations`;
44
+ SET @saved_cs_client = @@character_set_client;
45
+ SET character_set_client = utf8;
46
+ CREATE TABLE `url_relations` (
47
+ `id` bigint(11) NOT NULL auto_increment,
48
+ `referer` varchar(255) NOT NULL default '-',
49
+ `url` varchar(255) NOT NULL default '-',
50
+ `referer_crc32` bigint(11) NOT NULL,
51
+ `url_crc32` bigint(11) NOT NULL,
52
+ PRIMARY KEY (`id`),
53
+ KEY `idx_referer_crc32` (`referer_crc32`),
54
+ KEY `idx_url_crc32` (`url_crc32`)
55
+ ) ENGINE=MyISAM AUTO_INCREMENT=11089 DEFAULT CHARSET=utf8;
56
+ SET character_set_client = @saved_cs_client;
57
+
58
+ --
59
+ -- Table structure for table `urls`
60
+ --
61
+
62
+ DROP TABLE IF EXISTS `urls`;
63
+ SET @saved_cs_client = @@character_set_client;
64
+ SET character_set_client = utf8;
65
+ CREATE TABLE `urls` (
66
+ `source` varchar(32) NOT NULL default '',
67
+ `url` varchar(256) NOT NULL default '',
68
+ `added` bigint(11) default NULL,
69
+ `visited` bigint(11) default NULL,
70
+ `ukey` varchar(128) NOT NULL default '-',
71
+ `score` int(4) default NULL,
72
+ `errors` int(4) default '0',
73
+ `url_crc32` bigint(11) NOT NULL default '0',
74
+ PRIMARY KEY (`ukey`),
75
+ KEY `visited` (`visited`),
76
+ KEY `added` (`added`),
77
+ KEY `source` (`source`)
78
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
79
+ SET character_set_client = @saved_cs_client;
80
+ /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
81
+
82
+ /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
83
+ /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
84
+ /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
85
+ /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
86
+ /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
87
+ /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
88
+ /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
89
+
90
+ -- Dump completed on 2008-09-08 16:22:18
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: rspider
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.8.4
7
+ date: 2008-09-15 00:00:00 +08:00
8
+ summary: Web cralwer
9
+ require_paths:
10
+ - lib
11
+ email: xurenlu@gmail.com
12
+ homepage: http://www.162cm.com/
13
+ rubyforge_project: rspider
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Renlu Xu
31
+ files:
32
+ - lib/rspider/UrlScorer.rb
33
+ - lib/rspider/UrlStorage.rb
34
+ - lib/rspider/mysql.rb
35
+ - lib/rspider/MysqlUrlRelationStorage.rb
36
+ - lib/rspider/Logger.rb
37
+ - lib/rspider/cookie.rb
38
+ - lib/rspider/RobotRules.rb
39
+ - lib/rspider/SiteLocker.rb
40
+ - lib/rspider/UrlDispatcher.rb
41
+ - lib/rspider/ThreadPool.rb
42
+ - lib/rspider/OptParser.rb
43
+ - lib/rspider/DataWasher.rb
44
+ - lib/rspider/HtmlTidy.rb
45
+ - lib/rspider/ContentStorage.rb
46
+ - lib/rspider/Spider.rb
47
+ - lib/rspider/DocumentExtractor.rb
48
+ - lib/rspider/browser.rb
49
+ - lib/rspider/links.rb
50
+ - lib/rspider/ConfParser.rb
51
+ - lib/rspider/MysqlUrlStorage.rb
52
+ - lib/rspider/Document.rb
53
+ - lib/rspider.rb
54
+ - sql/db.sql
55
+ - Changelog
56
+ - ToDo
57
+ - conf/local.conf
58
+ - Rakefile
59
+ test_files: []
60
+
61
+ rdoc_options: []
62
+
63
+ extra_rdoc_files: []
64
+
65
+ executables:
66
+ - linkcheck.rb
67
+ - main.rb
68
+ extensions: []
69
+
70
+ requirements: []
71
+
72
+ dependencies: []
73
+