powerdlz23 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/Spider/README.md +19 -0
  2. package/Spider/domain.py +18 -0
  3. package/Spider/general.py +51 -0
  4. package/Spider/link_finder.py +25 -0
  5. package/Spider/main.py +50 -0
  6. package/Spider/spider.py +74 -0
  7. package/crawler/.formatter.exs +5 -0
  8. package/crawler/.github/workflows/ci.yml +29 -0
  9. package/crawler/.recode.exs +33 -0
  10. package/crawler/.tool-versions +2 -0
  11. package/crawler/CHANGELOG.md +82 -0
  12. package/crawler/README.md +198 -0
  13. package/crawler/architecture.svg +4 -0
  14. package/crawler/config/config.exs +9 -0
  15. package/crawler/config/dev.exs +5 -0
  16. package/crawler/config/test.exs +5 -0
  17. package/crawler/examples/google_search/scraper.ex +37 -0
  18. package/crawler/examples/google_search/url_filter.ex +11 -0
  19. package/crawler/examples/google_search.ex +77 -0
  20. package/crawler/lib/crawler/dispatcher/worker.ex +14 -0
  21. package/crawler/lib/crawler/dispatcher.ex +20 -0
  22. package/crawler/lib/crawler/fetcher/header_preparer.ex +60 -0
  23. package/crawler/lib/crawler/fetcher/modifier.ex +45 -0
  24. package/crawler/lib/crawler/fetcher/policer.ex +77 -0
  25. package/crawler/lib/crawler/fetcher/recorder.ex +55 -0
  26. package/crawler/lib/crawler/fetcher/requester.ex +32 -0
  27. package/crawler/lib/crawler/fetcher/retrier.ex +43 -0
  28. package/crawler/lib/crawler/fetcher/url_filter.ex +26 -0
  29. package/crawler/lib/crawler/fetcher.ex +81 -0
  30. package/crawler/lib/crawler/http.ex +7 -0
  31. package/crawler/lib/crawler/linker/path_builder.ex +71 -0
  32. package/crawler/lib/crawler/linker/path_expander.ex +59 -0
  33. package/crawler/lib/crawler/linker/path_finder.ex +106 -0
  34. package/crawler/lib/crawler/linker/path_offliner.ex +59 -0
  35. package/crawler/lib/crawler/linker/path_prefixer.ex +46 -0
  36. package/crawler/lib/crawler/linker.ex +173 -0
  37. package/crawler/lib/crawler/options.ex +127 -0
  38. package/crawler/lib/crawler/parser/css_parser.ex +37 -0
  39. package/crawler/lib/crawler/parser/guarder.ex +38 -0
  40. package/crawler/lib/crawler/parser/html_parser.ex +41 -0
  41. package/crawler/lib/crawler/parser/link_parser/link_expander.ex +32 -0
  42. package/crawler/lib/crawler/parser/link_parser.ex +50 -0
  43. package/crawler/lib/crawler/parser.ex +122 -0
  44. package/crawler/lib/crawler/queue_handler.ex +45 -0
  45. package/crawler/lib/crawler/scraper.ex +28 -0
  46. package/crawler/lib/crawler/snapper/dir_maker.ex +45 -0
  47. package/crawler/lib/crawler/snapper/link_replacer.ex +95 -0
  48. package/crawler/lib/crawler/snapper.ex +82 -0
  49. package/crawler/lib/crawler/store/counter.ex +19 -0
  50. package/crawler/lib/crawler/store/page.ex +7 -0
  51. package/crawler/lib/crawler/store.ex +87 -0
  52. package/crawler/lib/crawler/worker.ex +62 -0
  53. package/crawler/lib/crawler.ex +91 -0
  54. package/crawler/mix.exs +78 -0
  55. package/crawler/mix.lock +40 -0
  56. package/crawler/test/fixtures/introducing-elixir.jpg +0 -0
  57. package/crawler/test/integration_test.exs +135 -0
  58. package/crawler/test/lib/crawler/dispatcher/worker_test.exs +7 -0
  59. package/crawler/test/lib/crawler/dispatcher_test.exs +5 -0
  60. package/crawler/test/lib/crawler/fetcher/header_preparer_test.exs +7 -0
  61. package/crawler/test/lib/crawler/fetcher/policer_test.exs +71 -0
  62. package/crawler/test/lib/crawler/fetcher/recorder_test.exs +9 -0
  63. package/crawler/test/lib/crawler/fetcher/requester_test.exs +9 -0
  64. package/crawler/test/lib/crawler/fetcher/retrier_test.exs +7 -0
  65. package/crawler/test/lib/crawler/fetcher/url_filter_test.exs +7 -0
  66. package/crawler/test/lib/crawler/fetcher_test.exs +153 -0
  67. package/crawler/test/lib/crawler/http_test.exs +47 -0
  68. package/crawler/test/lib/crawler/linker/path_builder_test.exs +7 -0
  69. package/crawler/test/lib/crawler/linker/path_expander_test.exs +7 -0
  70. package/crawler/test/lib/crawler/linker/path_finder_test.exs +7 -0
  71. package/crawler/test/lib/crawler/linker/path_offliner_test.exs +7 -0
  72. package/crawler/test/lib/crawler/linker/path_prefixer_test.exs +7 -0
  73. package/crawler/test/lib/crawler/linker_test.exs +7 -0
  74. package/crawler/test/lib/crawler/options_test.exs +7 -0
  75. package/crawler/test/lib/crawler/parser/css_parser_test.exs +7 -0
  76. package/crawler/test/lib/crawler/parser/guarder_test.exs +7 -0
  77. package/crawler/test/lib/crawler/parser/html_parser_test.exs +7 -0
  78. package/crawler/test/lib/crawler/parser/link_parser/link_expander_test.exs +7 -0
  79. package/crawler/test/lib/crawler/parser/link_parser_test.exs +7 -0
  80. package/crawler/test/lib/crawler/parser_test.exs +8 -0
  81. package/crawler/test/lib/crawler/queue_handler_test.exs +7 -0
  82. package/crawler/test/lib/crawler/scraper_test.exs +7 -0
  83. package/crawler/test/lib/crawler/snapper/dir_maker_test.exs +7 -0
  84. package/crawler/test/lib/crawler/snapper/link_replacer_test.exs +7 -0
  85. package/crawler/test/lib/crawler/snapper_test.exs +9 -0
  86. package/crawler/test/lib/crawler/worker_test.exs +5 -0
  87. package/crawler/test/lib/crawler_test.exs +295 -0
  88. package/crawler/test/support/test_case.ex +24 -0
  89. package/crawler/test/support/test_helpers.ex +28 -0
  90. package/crawler/test/test_helper.exs +7 -0
  91. package/package.json +1 -1
  92. package/pto/CryptoNoter/.gitattributes +2 -0
  93. package/pto/CryptoNoter/CryptoNight.md +444 -0
  94. package/pto/CryptoNoter/CryptoNight.txt +364 -0
  95. package/pto/CryptoNoter/LICENSE +21 -0
  96. package/pto/CryptoNoter/README.md +178 -0
  97. package/pto/CryptoNoter/banner +4 -0
  98. package/pto/CryptoNoter/config.json +8 -0
  99. package/pto/CryptoNoter/install.sh +60 -0
  100. package/pto/CryptoNoter/package-lock.json +33 -0
  101. package/pto/CryptoNoter/package.json +16 -0
  102. package/pto/CryptoNoter/server.js +225 -0
  103. package/pto/CryptoNoter/web/demo.html +81 -0
  104. package/pto/CryptoNoter/web/index.html +1 -0
  105. package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js +16891 -0
  106. package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js.mem +0 -0
  107. package/pto/CryptoNoter/web/lib/cryptonight.wasm +0 -0
  108. package/pto/CryptoNoter/web/processor.js +496 -0
  109. package/pto/CryptoNoter/web/worker.js +5549 -0
  110. package/pto/crypto/README.md +1 -0
  111. package/pto/crypto/aes256cbc/README.md +59 -0
  112. package/pto/crypto/aes256cbc/aes256cbc.go +172 -0
  113. package/pto/crypto/aes256cbc/aes256cbc_test.go +105 -0
  114. package/pto/crypto/aes256cbc/examples_test.go +30 -0
  115. package/pto/crypto/dh64/README.md +84 -0
  116. package/pto/crypto/dh64/c/dh64.c +75 -0
  117. package/pto/crypto/dh64/c/dh64.h +12 -0
  118. package/pto/crypto/dh64/c/dh64_test.c +30 -0
  119. package/pto/crypto/dh64/csharp/dh64.cs +77 -0
  120. package/pto/crypto/dh64/csharp/dh64_test.cs +1074 -0
  121. package/pto/crypto/dh64/go/dh64.go +72 -0
  122. package/pto/crypto/dh64/go/dh64_test.go +1064 -0
  123. package/pto/crypto/mt19937/README.md +30 -0
  124. package/pto/crypto/mt19937/c/mt19937-64.c +180 -0
  125. package/pto/crypto/mt19937/c/mt19937-64.h +96 -0
  126. package/pto/crypto/mt19937/c/mt19937-64.out.txt +401 -0
  127. package/pto/crypto/mt19937/c/mt19937-64test.c +78 -0
  128. package/pto/crypto/mt19937/csharp/mt19937.cs +139 -0
  129. package/pto/crypto/mt19937/csharp/mt19937_test.cs +574 -0
  130. package/pto/crypto/mt19937/go/COPYING +674 -0
  131. package/pto/crypto/mt19937/go/README.rst +103 -0
  132. package/pto/crypto/mt19937/go/doc.go +35 -0
  133. package/pto/crypto/mt19937/go/example.go +32 -0
  134. package/pto/crypto/mt19937/go/mt19937.go +149 -0
  135. package/pto/crypto/mt19937/go/mt19937_test.go +614 -0
  136. package/pto/crypto/rc4/README.md +14 -0
  137. package/pto/crypto/rc4/csharp/rc4.cs +119 -0
  138. package/pto/crypto/rc4/csharp/rc4_echo_client.cs +78 -0
  139. package/pto/crypto/rc4/go/rc4_echo_client.go +102 -0
  140. package/pto/crypto/rc4/go/rc4_echo_server.go +110 -0
  141. package/rubyretriever/.rspec +2 -0
  142. package/rubyretriever/.travis.yml +7 -0
  143. package/rubyretriever/Gemfile +3 -0
  144. package/rubyretriever/Gemfile.lock +64 -0
  145. package/rubyretriever/LICENSE +20 -0
  146. package/rubyretriever/Rakefile +7 -0
  147. package/rubyretriever/bin/rr +79 -0
  148. package/rubyretriever/lib/retriever/cli.rb +25 -0
  149. package/rubyretriever/lib/retriever/core_ext.rb +13 -0
  150. package/rubyretriever/lib/retriever/fetch.rb +268 -0
  151. package/rubyretriever/lib/retriever/fetchfiles.rb +71 -0
  152. package/rubyretriever/lib/retriever/fetchseo.rb +18 -0
  153. package/rubyretriever/lib/retriever/fetchsitemap.rb +43 -0
  154. package/rubyretriever/lib/retriever/link.rb +47 -0
  155. package/rubyretriever/lib/retriever/openuri_redirect_patch.rb +8 -0
  156. package/rubyretriever/lib/retriever/page.rb +104 -0
  157. package/rubyretriever/lib/retriever/page_iterator.rb +21 -0
  158. package/rubyretriever/lib/retriever/target.rb +47 -0
  159. package/rubyretriever/lib/retriever/version.rb +4 -0
  160. package/rubyretriever/lib/retriever.rb +15 -0
  161. package/rubyretriever/readme.md +166 -0
  162. package/rubyretriever/rubyretriever.gemspec +41 -0
  163. package/rubyretriever/spec/link_spec.rb +77 -0
  164. package/rubyretriever/spec/page_spec.rb +94 -0
  165. package/rubyretriever/spec/retriever_spec.rb +84 -0
  166. package/rubyretriever/spec/spec_helper.rb +17 -0
  167. package/rubyretriever/spec/target_spec.rb +55 -0
@@ -0,0 +1,295 @@
1
+ defmodule CrawlerTest do
2
+ use Crawler.TestCase, async: false
3
+
4
+ alias Crawler.Store
5
+
6
+ @moduletag capture_log: true
7
+
8
+ doctest Crawler
9
+
10
+ test ".crawl", %{bypass: bypass, url: url} do
11
+ Store.ops_reset()
12
+
13
+ url = "#{url}/crawler"
14
+ linked_url1 = "#{url}/link1"
15
+ linked_url2 = "#{url}/link2"
16
+ linked_url3 = "#{url}/link3"
17
+ linked_url4 = "#{url}/link4"
18
+
19
+ Bypass.expect_once(bypass, "GET", "/crawler", fn conn ->
20
+ Plug.Conn.resp(conn, 200, """
21
+ <html><a href="#{linked_url1}">1</a></html>
22
+ <html><a href="#{linked_url2}">2</a></html>
23
+ """)
24
+ end)
25
+
26
+ Bypass.expect_once(bypass, "GET", "/crawler/link1", fn conn ->
27
+ Plug.Conn.resp(conn, 200, """
28
+ <html><a href="#{linked_url2}">2</a></html>
29
+ """)
30
+ end)
31
+
32
+ Bypass.expect_once(bypass, "GET", "/crawler/link2", fn conn ->
33
+ Plug.Conn.resp(conn, 200, """
34
+ <html><a href="#{linked_url3}">3</a></html>
35
+ """)
36
+ end)
37
+
38
+ Bypass.expect_once(bypass, "GET", "/crawler/link3", fn conn ->
39
+ Plug.Conn.resp(conn, 200, """
40
+ <html><a href="#{linked_url4}">4</a></html>
41
+ """)
42
+ end)
43
+
44
+ {:ok, opts} = Crawler.crawl(url, max_depths: 3, workers: 3, interval: 100, store: Store)
45
+
46
+ assert Crawler.running?(opts)
47
+
48
+ Crawler.pause(opts)
49
+
50
+ refute Crawler.running?(opts)
51
+
52
+ assert opts[:workers] == 3
53
+
54
+ Crawler.resume(opts)
55
+
56
+ assert Crawler.running?(opts)
57
+
58
+ wait(fn ->
59
+ assert Store.ops_count() == 4
60
+ end)
61
+
62
+ wait(fn ->
63
+ assert %Store.Page{url: ^url, opts: %{workers: 3}} = Store.find_processed({url, nil})
64
+
65
+ assert Store.find_processed({linked_url1, nil})
66
+ assert Store.find_processed({linked_url2, nil})
67
+ assert Store.find_processed({linked_url3, nil})
68
+ refute Store.find({linked_url4, nil})
69
+
70
+ urls = Crawler.Store.all_urls()
71
+
72
+ assert Enum.member?(urls, {url, nil})
73
+ assert Enum.member?(urls, {linked_url1, nil})
74
+ assert Enum.member?(urls, {linked_url2, nil})
75
+ assert Enum.member?(urls, {linked_url3, nil})
76
+ refute Enum.member?(urls, {linked_url4, nil})
77
+ end)
78
+
79
+ wait(fn ->
80
+ refute Crawler.running?(opts)
81
+ assert OPQ.info(opts[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 3}
82
+ end)
83
+ end
84
+
85
+ test ".crawl without a store", %{bypass: bypass, url: url} do
86
+ url = "#{url}/crawler_without_store"
87
+
88
+ Bypass.expect_once(bypass, "GET", "/crawler_without_store", fn conn ->
89
+ Plug.Conn.resp(conn, 200, "200")
90
+ end)
91
+
92
+ {:ok, opts} = Crawler.crawl(url, max_depths: 1, workers: 1, interval: 100, store: nil)
93
+
94
+ wait(fn ->
95
+ assert %Store.Page{url: ^url, body: nil, opts: nil} = Store.find_processed({url, nil})
96
+ end)
97
+
98
+ wait(fn ->
99
+ assert OPQ.info(opts[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 1}
100
+ end)
101
+ end
102
+
103
+ test ".crawl with max_pages", %{bypass: bypass, url: url} do
104
+ Store.ops_reset()
105
+
106
+ url = "#{url}/crawler_with_max_pages"
107
+ linked_url1 = "#{url}/link1"
108
+ linked_url2 = "#{url}/link2"
109
+ linked_url3 = "#{url}/link3"
110
+ linked_url4 = "#{url}/link4"
111
+ linked_url5 = "#{url}/link5"
112
+
113
+ Bypass.expect_once(bypass, "GET", "/crawler_with_max_pages", fn conn ->
114
+ Plug.Conn.resp(conn, 200, """
115
+ <html><a href="#{linked_url1}">1</a></html>
116
+ <html><a href="#{linked_url2}">2</a></html>
117
+ <html><a href="#{linked_url3}">3</a></html>
118
+ <html><a href="#{linked_url4}">4</a></html>
119
+ <html><a href="#{linked_url5}">5</a></html>
120
+ """)
121
+ end)
122
+
123
+ Bypass.expect_once(bypass, "GET", "/crawler_with_max_pages/link1", fn conn ->
124
+ Plug.Conn.resp(conn, 200, """
125
+ <html><a href="#{linked_url2}">2</a></html>
126
+ <html><a href="#{linked_url3}">3</a></html>
127
+ """)
128
+ end)
129
+
130
+ Bypass.expect_once(bypass, "GET", "/crawler_with_max_pages/link2", fn conn ->
131
+ Plug.Conn.resp(conn, 200, """
132
+ <html><a href="#{linked_url3}">3</a></html>
133
+ <html><a href="#{linked_url4}">4</a></html>
134
+ <html><a href="#{linked_url5}">5</a></html>
135
+ """)
136
+ end)
137
+
138
+ Bypass.stub(bypass, "GET", "/crawler_with_max_pages/link3", fn conn ->
139
+ Plug.Conn.resp(conn, 200, """
140
+ <html><a href="#{linked_url3}">3</a></html>
141
+ <html><a href="#{linked_url4}">4</a></html>
142
+ <html><a href="#{linked_url5}">5</a></html>
143
+ """)
144
+ end)
145
+
146
+ Bypass.stub(bypass, "GET", "/crawler_with_max_pages/link4", fn conn ->
147
+ Plug.Conn.resp(conn, 200, """
148
+ <html><a href="#{linked_url3}">3</a></html>
149
+ <html><a href="#{linked_url4}">4</a></html>
150
+ <html><a href="#{linked_url5}">5</a></html>
151
+ """)
152
+ end)
153
+
154
+ {:ok, opts} = Crawler.crawl(url, max_depths: 3, force: true, workers: 4, max_pages: 3, interval: 100)
155
+
156
+ wait(fn ->
157
+ assert Store.ops_count() == 4
158
+ end)
159
+
160
+ wait(fn ->
161
+ assert Store.find_processed({url, opts[:scope]})
162
+ assert Store.find_processed({linked_url1, opts[:scope]})
163
+ assert Store.find_processed({linked_url2, opts[:scope]})
164
+ assert Store.find_processed({linked_url3, opts[:scope]})
165
+ refute Store.find({linked_url4, opts[:scope]})
166
+ refute Store.find({linked_url5, opts[:scope]})
167
+ end)
168
+
169
+ wait(fn ->
170
+ assert OPQ.info(opts[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 4}
171
+ end)
172
+ end
173
+
174
+ test ".crawl with an existing queue", %{bypass: bypass, url: url} do
175
+ Store.ops_reset()
176
+
177
+ url = "#{url}/crawler_with_queue"
178
+ linked_url1 = "#{url}/link1"
179
+ linked_url2 = "#{url}/link2"
180
+ linked_url3 = "#{url}/link3"
181
+ linked_url4 = "#{url}/link4"
182
+
183
+ Bypass.expect_once(bypass, "GET", "/crawler_with_queue/link1", fn conn ->
184
+ Plug.Conn.resp(conn, 200, """
185
+ <html><a href="#{linked_url2}">2</a></html>
186
+ """)
187
+ end)
188
+
189
+ Bypass.expect_once(bypass, "GET", "/crawler_with_queue/link2", fn conn ->
190
+ Plug.Conn.resp(conn, 200, """
191
+ <html><a href="#{linked_url3}">3</a></html>
192
+ """)
193
+ end)
194
+
195
+ Bypass.expect_once(bypass, "GET", "/crawler_with_queue/link3", fn conn ->
196
+ Plug.Conn.resp(conn, 200, """
197
+ <html>ok</html>
198
+ """)
199
+ end)
200
+
201
+ {:ok, queue} = OPQ.init(worker: Crawler.Dispatcher.Worker, workers: 2, interval: 100)
202
+
203
+ {:ok, opts1} = Crawler.crawl(linked_url1, store: Store, queue: queue)
204
+ {:ok, opts2} = Crawler.crawl(linked_url2, store: Store, queue: queue)
205
+
206
+ wait(fn ->
207
+ assert Store.ops_count() == 3
208
+ end)
209
+
210
+ wait(fn ->
211
+ assert Store.find_processed({linked_url1, nil})
212
+ assert Store.find_processed({linked_url2, nil})
213
+ assert Store.find_processed({linked_url3, nil})
214
+ refute Store.find_processed({linked_url4, nil})
215
+
216
+ urls = Crawler.Store.all_urls()
217
+
218
+ assert Enum.member?(urls, {linked_url1, nil})
219
+ assert Enum.member?(urls, {linked_url2, nil})
220
+ assert Enum.member?(urls, {linked_url3, nil})
221
+ refute Enum.member?(urls, {linked_url4, nil})
222
+ end)
223
+
224
+ wait(fn ->
225
+ assert OPQ.info(opts1[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 2}
226
+ assert OPQ.info(opts2[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 2}
227
+ end)
228
+ end
229
+
230
+ test ".crawl forced", %{bypass: bypass, url: url} do
231
+ Store.ops_reset()
232
+
233
+ url = "#{url}/crawler_forced"
234
+ linked_url1 = "#{url}/link1"
235
+ linked_url2 = "#{url}/link2"
236
+
237
+ Bypass.expect(bypass, "GET", "/crawler_forced", fn conn ->
238
+ Plug.Conn.resp(conn, 200, """
239
+ <html><a href="#{linked_url1}">1</a></html>
240
+ <html><a href="#{linked_url1}">1</a></html>
241
+ """)
242
+ end)
243
+
244
+ Bypass.expect(bypass, "GET", "/crawler_forced/link1", fn conn ->
245
+ Plug.Conn.resp(conn, 200, """
246
+ <html><a href="#{linked_url2}">2</a></html>
247
+ """)
248
+ end)
249
+
250
+ Bypass.expect(bypass, "GET", "/crawler_forced/link2", fn conn ->
251
+ Plug.Conn.resp(conn, 200, """
252
+ <html>ok</html>
253
+ """)
254
+ end)
255
+
256
+ {:ok, opts1} = Crawler.crawl(url, force: true, workers: 1, interval: 100)
257
+ {:ok, opts2} = Crawler.crawl(url, force: true, workers: 2, interval: 100)
258
+
259
+ refute opts1[:scope] == opts2[:scope]
260
+
261
+ wait(fn ->
262
+ assert Store.find_processed({url, opts1[:scope]})
263
+ assert Store.find_processed({url, opts2[:scope]})
264
+ assert Store.find_processed({linked_url1, opts1[:scope]})
265
+ assert Store.find_processed({linked_url1, opts2[:scope]})
266
+ assert Store.find_processed({linked_url2, opts1[:scope]})
267
+ assert Store.find_processed({linked_url2, opts2[:scope]})
268
+
269
+ assert Store.ops_count() >= 6
270
+ assert Store.ops_count() <= 10
271
+
272
+ assert OPQ.info(opts1[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 1}
273
+ assert OPQ.info(opts2[:queue]) == {:normal, %OPQ.Queue{data: {[], []}}, 2}
274
+ end)
275
+ end
276
+
277
+ test ".crawl stopped", %{bypass: bypass, url: url} do
278
+ url = "#{url}/stop"
279
+ linked_url = "#{url}/stop1"
280
+
281
+ Bypass.expect_once(bypass, "GET", "/stop", fn conn ->
282
+ Plug.Conn.resp(conn, 200, """
283
+ <html><a href="#{linked_url}">1</a></html>
284
+ """)
285
+ end)
286
+
287
+ {:ok, opts} = Crawler.crawl(url, workers: 1, interval: 500)
288
+
289
+ Process.sleep(200)
290
+
291
+ Crawler.stop(opts)
292
+
293
+ refute Store.find({linked_url, nil})
294
+ end
295
+ end
@@ -0,0 +1,24 @@
1
+ defmodule Crawler.TestCase do
2
+ use ExUnit.CaseTemplate
3
+
4
+ using do
5
+ quote do
6
+ import Crawler.TestHelpers
7
+ end
8
+ end
9
+
10
+ setup_all do
11
+ bypass = Bypass.open()
12
+ url = "http://localhost:#{bypass.port}"
13
+ path = "localhost-#{bypass.port}"
14
+
15
+ bypass2 = Bypass.open()
16
+ url2 = "http://localhost:#{bypass2.port}"
17
+ path2 = "localhost-#{bypass2.port}"
18
+
19
+ {
20
+ :ok,
21
+ bypass: bypass, url: url, path: path, bypass2: bypass2, url2: url2, path2: path2
22
+ }
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ # Credit: https://gist.github.com/cblavier/5e15791387a6e22b98d8
2
+ defmodule Crawler.TestHelpers do
3
+ def wait(fun), do: wait(500, fun)
4
+ def wait(0, fun), do: fun.()
5
+
6
+ def wait(timeout, fun) do
7
+ try do
8
+ fun.()
9
+ rescue
10
+ _ ->
11
+ :timer.sleep(10)
12
+ wait(max(0, timeout - 10), fun)
13
+ end
14
+ end
15
+
16
+ def tmp(path \\ "", filename \\ "") do
17
+ tmp_path = Path.join([File.cwd!(), "test", "tmp", path])
18
+
19
+ File.mkdir_p(tmp_path)
20
+
21
+ Path.join(tmp_path, filename)
22
+ end
23
+
24
+ def image_file do
25
+ {:ok, file} = File.read("test/fixtures/introducing-elixir.jpg")
26
+ file
27
+ end
28
+ end
@@ -0,0 +1,7 @@
1
+ [File.cwd!(), "test", "tmp", "*"]
2
+ |> Path.join()
3
+ |> Path.wildcard()
4
+ |> Enum.each(&File.rm_rf/1)
5
+
6
+ ExUnit.start()
7
+ Application.ensure_all_started(:bypass)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "powerdlz23",
3
- "version": "1.2.2",
3
+ "version": "1.2.4",
4
4
  "scripts": {
5
5
  "dev": "next dev",
6
6
  "build": "next build",
@@ -0,0 +1,2 @@
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto