embulk-filter-crawler 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/crawler/CrawlerFilterPlugin.java +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5cbf3c1ddfcf0dcbcee9ec16bafd4cef9e142b3
|
4
|
+
data.tar.gz: ae351e9394e465c0a4470001f5e586c192a55a61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed78d7414aec7f5b0a40f763448471dbd9e470b51b9d113c9637561a1d806acd02c2410a5727392b5ed9e966b764a928cce6bc69541c331244fb68a35df0ed9d
|
7
|
+
data.tar.gz: 9ceaf01ba37e0c223d3f6fee34b80c31412a37d3b92bf3da64a8ad5786dc66783ddfa97ff376cec69a9f0dca6e8f37e0b8338e9b125e73ad6a7dbe1f76d9f0e6
|
data/build.gradle
CHANGED
@@ -45,7 +45,7 @@ public class CrawlerFilterPlugin
|
|
45
45
|
public Optional<Integer> getMaxDepthOfCrawling();
|
46
46
|
|
47
47
|
@Config("number_of_crawlers")
|
48
|
-
@ConfigDefault("
|
48
|
+
@ConfigDefault("2")
|
49
49
|
public int getNumberOfCrawlers();
|
50
50
|
|
51
51
|
@Config("max_pages_to_fetch")
|
@@ -161,7 +161,7 @@ public class CrawlerFilterPlugin
|
|
161
161
|
customData.put("should_not_visit_pattern", task.getShouldNotVisitPattern().get());
|
162
162
|
}
|
163
163
|
controller.setCustomData(customData);
|
164
|
-
controller.start(EmbulkCrawler.class, task.getNumberOfCrawlers());
|
164
|
+
controller.start(EmbulkCrawler.class, task.getNumberOfCrawlers() < 2 ? 2 : task.getNumberOfCrawlers());
|
165
165
|
for (Object object : controller.getCrawlersLocalData()) {
|
166
166
|
CrawlStat crawlStat = (CrawlStat) object;
|
167
167
|
for (Map<String, Object> map : crawlStat.getPages()) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,7 +73,7 @@ files:
|
|
73
73
|
- classpath/commons-logging-1.2.jar
|
74
74
|
- classpath/crawler4j-4.2.jar
|
75
75
|
- classpath/dom4j-1.6.1.jar
|
76
|
-
- classpath/embulk-filter-crawler-0.1.
|
76
|
+
- classpath/embulk-filter-crawler-0.1.3.jar
|
77
77
|
- classpath/fontbox-1.8.4.jar
|
78
78
|
- classpath/geronimo-stax-api_1.0_spec-1.0.1.jar
|
79
79
|
- classpath/httpclient-4.4.jar
|