embulk-filter-crawler 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/crawler/CrawlerFilterPlugin.java +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c5cbf3c1ddfcf0dcbcee9ec16bafd4cef9e142b3
|
|
4
|
+
data.tar.gz: ae351e9394e465c0a4470001f5e586c192a55a61
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ed78d7414aec7f5b0a40f763448471dbd9e470b51b9d113c9637561a1d806acd02c2410a5727392b5ed9e966b764a928cce6bc69541c331244fb68a35df0ed9d
|
|
7
|
+
data.tar.gz: 9ceaf01ba37e0c223d3f6fee34b80c31412a37d3b92bf3da64a8ad5786dc66783ddfa97ff376cec69a9f0dca6e8f37e0b8338e9b125e73ad6a7dbe1f76d9f0e6
|
data/build.gradle
CHANGED
|
@@ -45,7 +45,7 @@ public class CrawlerFilterPlugin
|
|
|
45
45
|
public Optional<Integer> getMaxDepthOfCrawling();
|
|
46
46
|
|
|
47
47
|
@Config("number_of_crawlers")
|
|
48
|
-
@ConfigDefault("
|
|
48
|
+
@ConfigDefault("2")
|
|
49
49
|
public int getNumberOfCrawlers();
|
|
50
50
|
|
|
51
51
|
@Config("max_pages_to_fetch")
|
|
@@ -161,7 +161,7 @@ public class CrawlerFilterPlugin
|
|
|
161
161
|
customData.put("should_not_visit_pattern", task.getShouldNotVisitPattern().get());
|
|
162
162
|
}
|
|
163
163
|
controller.setCustomData(customData);
|
|
164
|
-
controller.start(EmbulkCrawler.class, task.getNumberOfCrawlers());
|
|
164
|
+
controller.start(EmbulkCrawler.class, task.getNumberOfCrawlers() < 2 ? 2 : task.getNumberOfCrawlers());
|
|
165
165
|
for (Object object : controller.getCrawlersLocalData()) {
|
|
166
166
|
CrawlStat crawlStat = (CrawlStat) object;
|
|
167
167
|
for (Map<String, Object> map : crawlStat.getPages()) {
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-filter-crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- toyama0919
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-04-
|
|
11
|
+
date: 2016-04-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -73,7 +73,7 @@ files:
|
|
|
73
73
|
- classpath/commons-logging-1.2.jar
|
|
74
74
|
- classpath/crawler4j-4.2.jar
|
|
75
75
|
- classpath/dom4j-1.6.1.jar
|
|
76
|
-
- classpath/embulk-filter-crawler-0.1.
|
|
76
|
+
- classpath/embulk-filter-crawler-0.1.3.jar
|
|
77
77
|
- classpath/fontbox-1.8.4.jar
|
|
78
78
|
- classpath/geronimo-stax-api_1.0_spec-1.0.1.jar
|
|
79
79
|
- classpath/httpclient-4.4.jar
|