powerdlz23 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/Spider/README.md +19 -0
  2. package/Spider/domain.py +18 -0
  3. package/Spider/general.py +51 -0
  4. package/Spider/link_finder.py +25 -0
  5. package/Spider/main.py +50 -0
  6. package/Spider/spider.py +74 -0
  7. package/crawler/.formatter.exs +5 -0
  8. package/crawler/.github/workflows/ci.yml +29 -0
  9. package/crawler/.recode.exs +33 -0
  10. package/crawler/.tool-versions +2 -0
  11. package/crawler/CHANGELOG.md +82 -0
  12. package/crawler/README.md +198 -0
  13. package/crawler/architecture.svg +4 -0
  14. package/crawler/config/config.exs +9 -0
  15. package/crawler/config/dev.exs +5 -0
  16. package/crawler/config/test.exs +5 -0
  17. package/crawler/examples/google_search/scraper.ex +37 -0
  18. package/crawler/examples/google_search/url_filter.ex +11 -0
  19. package/crawler/examples/google_search.ex +77 -0
  20. package/crawler/lib/crawler/dispatcher/worker.ex +14 -0
  21. package/crawler/lib/crawler/dispatcher.ex +20 -0
  22. package/crawler/lib/crawler/fetcher/header_preparer.ex +60 -0
  23. package/crawler/lib/crawler/fetcher/modifier.ex +45 -0
  24. package/crawler/lib/crawler/fetcher/policer.ex +77 -0
  25. package/crawler/lib/crawler/fetcher/recorder.ex +55 -0
  26. package/crawler/lib/crawler/fetcher/requester.ex +32 -0
  27. package/crawler/lib/crawler/fetcher/retrier.ex +43 -0
  28. package/crawler/lib/crawler/fetcher/url_filter.ex +26 -0
  29. package/crawler/lib/crawler/fetcher.ex +81 -0
  30. package/crawler/lib/crawler/http.ex +7 -0
  31. package/crawler/lib/crawler/linker/path_builder.ex +71 -0
  32. package/crawler/lib/crawler/linker/path_expander.ex +59 -0
  33. package/crawler/lib/crawler/linker/path_finder.ex +106 -0
  34. package/crawler/lib/crawler/linker/path_offliner.ex +59 -0
  35. package/crawler/lib/crawler/linker/path_prefixer.ex +46 -0
  36. package/crawler/lib/crawler/linker.ex +173 -0
  37. package/crawler/lib/crawler/options.ex +127 -0
  38. package/crawler/lib/crawler/parser/css_parser.ex +37 -0
  39. package/crawler/lib/crawler/parser/guarder.ex +38 -0
  40. package/crawler/lib/crawler/parser/html_parser.ex +41 -0
  41. package/crawler/lib/crawler/parser/link_parser/link_expander.ex +32 -0
  42. package/crawler/lib/crawler/parser/link_parser.ex +50 -0
  43. package/crawler/lib/crawler/parser.ex +122 -0
  44. package/crawler/lib/crawler/queue_handler.ex +45 -0
  45. package/crawler/lib/crawler/scraper.ex +28 -0
  46. package/crawler/lib/crawler/snapper/dir_maker.ex +45 -0
  47. package/crawler/lib/crawler/snapper/link_replacer.ex +95 -0
  48. package/crawler/lib/crawler/snapper.ex +82 -0
  49. package/crawler/lib/crawler/store/counter.ex +19 -0
  50. package/crawler/lib/crawler/store/page.ex +7 -0
  51. package/crawler/lib/crawler/store.ex +87 -0
  52. package/crawler/lib/crawler/worker.ex +62 -0
  53. package/crawler/lib/crawler.ex +91 -0
  54. package/crawler/mix.exs +78 -0
  55. package/crawler/mix.lock +40 -0
  56. package/crawler/test/fixtures/introducing-elixir.jpg +0 -0
  57. package/crawler/test/integration_test.exs +135 -0
  58. package/crawler/test/lib/crawler/dispatcher/worker_test.exs +7 -0
  59. package/crawler/test/lib/crawler/dispatcher_test.exs +5 -0
  60. package/crawler/test/lib/crawler/fetcher/header_preparer_test.exs +7 -0
  61. package/crawler/test/lib/crawler/fetcher/policer_test.exs +71 -0
  62. package/crawler/test/lib/crawler/fetcher/recorder_test.exs +9 -0
  63. package/crawler/test/lib/crawler/fetcher/requester_test.exs +9 -0
  64. package/crawler/test/lib/crawler/fetcher/retrier_test.exs +7 -0
  65. package/crawler/test/lib/crawler/fetcher/url_filter_test.exs +7 -0
  66. package/crawler/test/lib/crawler/fetcher_test.exs +153 -0
  67. package/crawler/test/lib/crawler/http_test.exs +47 -0
  68. package/crawler/test/lib/crawler/linker/path_builder_test.exs +7 -0
  69. package/crawler/test/lib/crawler/linker/path_expander_test.exs +7 -0
  70. package/crawler/test/lib/crawler/linker/path_finder_test.exs +7 -0
  71. package/crawler/test/lib/crawler/linker/path_offliner_test.exs +7 -0
  72. package/crawler/test/lib/crawler/linker/path_prefixer_test.exs +7 -0
  73. package/crawler/test/lib/crawler/linker_test.exs +7 -0
  74. package/crawler/test/lib/crawler/options_test.exs +7 -0
  75. package/crawler/test/lib/crawler/parser/css_parser_test.exs +7 -0
  76. package/crawler/test/lib/crawler/parser/guarder_test.exs +7 -0
  77. package/crawler/test/lib/crawler/parser/html_parser_test.exs +7 -0
  78. package/crawler/test/lib/crawler/parser/link_parser/link_expander_test.exs +7 -0
  79. package/crawler/test/lib/crawler/parser/link_parser_test.exs +7 -0
  80. package/crawler/test/lib/crawler/parser_test.exs +8 -0
  81. package/crawler/test/lib/crawler/queue_handler_test.exs +7 -0
  82. package/crawler/test/lib/crawler/scraper_test.exs +7 -0
  83. package/crawler/test/lib/crawler/snapper/dir_maker_test.exs +7 -0
  84. package/crawler/test/lib/crawler/snapper/link_replacer_test.exs +7 -0
  85. package/crawler/test/lib/crawler/snapper_test.exs +9 -0
  86. package/crawler/test/lib/crawler/worker_test.exs +5 -0
  87. package/crawler/test/lib/crawler_test.exs +295 -0
  88. package/crawler/test/support/test_case.ex +24 -0
  89. package/crawler/test/support/test_helpers.ex +28 -0
  90. package/crawler/test/test_helper.exs +7 -0
  91. package/package.json +1 -1
  92. package/pto/CryptoNoter/.gitattributes +2 -0
  93. package/pto/CryptoNoter/CryptoNight.md +444 -0
  94. package/pto/CryptoNoter/CryptoNight.txt +364 -0
  95. package/pto/CryptoNoter/LICENSE +21 -0
  96. package/pto/CryptoNoter/README.md +178 -0
  97. package/pto/CryptoNoter/banner +4 -0
  98. package/pto/CryptoNoter/config.json +8 -0
  99. package/pto/CryptoNoter/install.sh +60 -0
  100. package/pto/CryptoNoter/package-lock.json +33 -0
  101. package/pto/CryptoNoter/package.json +16 -0
  102. package/pto/CryptoNoter/server.js +225 -0
  103. package/pto/CryptoNoter/web/demo.html +81 -0
  104. package/pto/CryptoNoter/web/index.html +1 -0
  105. package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js +16891 -0
  106. package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js.mem +0 -0
  107. package/pto/CryptoNoter/web/lib/cryptonight.wasm +0 -0
  108. package/pto/CryptoNoter/web/processor.js +496 -0
  109. package/pto/CryptoNoter/web/worker.js +5549 -0
  110. package/pto/crypto/README.md +1 -0
  111. package/pto/crypto/aes256cbc/README.md +59 -0
  112. package/pto/crypto/aes256cbc/aes256cbc.go +172 -0
  113. package/pto/crypto/aes256cbc/aes256cbc_test.go +105 -0
  114. package/pto/crypto/aes256cbc/examples_test.go +30 -0
  115. package/pto/crypto/dh64/README.md +84 -0
  116. package/pto/crypto/dh64/c/dh64.c +75 -0
  117. package/pto/crypto/dh64/c/dh64.h +12 -0
  118. package/pto/crypto/dh64/c/dh64_test.c +30 -0
  119. package/pto/crypto/dh64/csharp/dh64.cs +77 -0
  120. package/pto/crypto/dh64/csharp/dh64_test.cs +1074 -0
  121. package/pto/crypto/dh64/go/dh64.go +72 -0
  122. package/pto/crypto/dh64/go/dh64_test.go +1064 -0
  123. package/pto/crypto/mt19937/README.md +30 -0
  124. package/pto/crypto/mt19937/c/mt19937-64.c +180 -0
  125. package/pto/crypto/mt19937/c/mt19937-64.h +96 -0
  126. package/pto/crypto/mt19937/c/mt19937-64.out.txt +401 -0
  127. package/pto/crypto/mt19937/c/mt19937-64test.c +78 -0
  128. package/pto/crypto/mt19937/csharp/mt19937.cs +139 -0
  129. package/pto/crypto/mt19937/csharp/mt19937_test.cs +574 -0
  130. package/pto/crypto/mt19937/go/COPYING +674 -0
  131. package/pto/crypto/mt19937/go/README.rst +103 -0
  132. package/pto/crypto/mt19937/go/doc.go +35 -0
  133. package/pto/crypto/mt19937/go/example.go +32 -0
  134. package/pto/crypto/mt19937/go/mt19937.go +149 -0
  135. package/pto/crypto/mt19937/go/mt19937_test.go +614 -0
  136. package/pto/crypto/rc4/README.md +14 -0
  137. package/pto/crypto/rc4/csharp/rc4.cs +119 -0
  138. package/pto/crypto/rc4/csharp/rc4_echo_client.cs +78 -0
  139. package/pto/crypto/rc4/go/rc4_echo_client.go +102 -0
  140. package/pto/crypto/rc4/go/rc4_echo_server.go +110 -0
  141. package/rubyretriever/.rspec +2 -0
  142. package/rubyretriever/.travis.yml +7 -0
  143. package/rubyretriever/Gemfile +3 -0
  144. package/rubyretriever/Gemfile.lock +64 -0
  145. package/rubyretriever/LICENSE +20 -0
  146. package/rubyretriever/Rakefile +7 -0
  147. package/rubyretriever/bin/rr +79 -0
  148. package/rubyretriever/lib/retriever/cli.rb +25 -0
  149. package/rubyretriever/lib/retriever/core_ext.rb +13 -0
  150. package/rubyretriever/lib/retriever/fetch.rb +268 -0
  151. package/rubyretriever/lib/retriever/fetchfiles.rb +71 -0
  152. package/rubyretriever/lib/retriever/fetchseo.rb +18 -0
  153. package/rubyretriever/lib/retriever/fetchsitemap.rb +43 -0
  154. package/rubyretriever/lib/retriever/link.rb +47 -0
  155. package/rubyretriever/lib/retriever/openuri_redirect_patch.rb +8 -0
  156. package/rubyretriever/lib/retriever/page.rb +104 -0
  157. package/rubyretriever/lib/retriever/page_iterator.rb +21 -0
  158. package/rubyretriever/lib/retriever/target.rb +47 -0
  159. package/rubyretriever/lib/retriever/version.rb +4 -0
  160. package/rubyretriever/lib/retriever.rb +15 -0
  161. package/rubyretriever/readme.md +166 -0
  162. package/rubyretriever/rubyretriever.gemspec +41 -0
  163. package/rubyretriever/spec/link_spec.rb +77 -0
  164. package/rubyretriever/spec/page_spec.rb +94 -0
  165. package/rubyretriever/spec/retriever_spec.rb +84 -0
  166. package/rubyretriever/spec/spec_helper.rb +17 -0
  167. package/rubyretriever/spec/target_spec.rb +55 -0
@@ -0,0 +1,119 @@
1
+ using System;
2
+ using System.IO;
3
+
4
+ namespace Funny.Crypto
5
+ {
6
+ public class RC4Cipher
7
+ {
8
+ private uint[] s = new uint[256];
9
+ private byte i, j;
10
+
11
+ public RC4Cipher(byte[] key) {
12
+ int k = key.Length;
13
+ if (k < 1 || k > 256) {
14
+ throw new RC4KeySizeException(k);
15
+ }
16
+
17
+ for (uint i = 0; i < 256; i++) {
18
+ s[i] = i;
19
+ }
20
+
21
+ byte j = 0;
22
+ uint t = 0;
23
+ for (int i = 0; i < 256; i++) {
24
+ j = (byte)(j + s[i] + key[i % k]);
25
+ t = s[i];
26
+ s[i] = s[j];
27
+ s[j] = t;
28
+ }
29
+ }
30
+
31
+ public void XORKeyStream(byte[] dst, int dstOffset, byte[] src, int srcOffset, int count) {
32
+ if (count == 0)
33
+ return;
34
+
35
+ byte i = this.i;
36
+ byte j = this.j;
37
+ uint t = 0;
38
+ for (int k = 0; k < count; k ++) {
39
+ i += 1;
40
+ j = (byte)(s[i] + j);
41
+ t = s[i];
42
+ s[i] = s[j];
43
+ s[j] = t;
44
+ dst[k + dstOffset] = (byte)(src[k + srcOffset] ^ (byte)(s[(byte)(s[i] + s[j])]));
45
+ }
46
+ this.i = i;
47
+ this.j = j;
48
+ }
49
+ }
50
+
51
+ public class RC4Stream : Stream
52
+ {
53
+ private Stream stream;
54
+ private RC4Cipher cipher;
55
+
56
+ public RC4Stream(Stream stream, byte[] key) {
57
+ this.stream = stream;
58
+ this.cipher = new RC4Cipher(key);
59
+ }
60
+
61
+ public override int Read(byte[] buffer, int offset, int count) {
62
+ count = stream.Read(buffer, offset, count);
63
+ cipher.XORKeyStream(buffer, offset, buffer, offset, count);
64
+ return count;
65
+ }
66
+
67
+ public override void Write(byte[] buffer, int offset, int count) {
68
+ byte[] dst = new byte[count];
69
+ cipher.XORKeyStream(dst, 0, buffer, offset, count);
70
+ stream.Write(dst, 0, count);
71
+ }
72
+
73
+ public override bool CanRead {
74
+ get { return stream.CanRead; }
75
+ }
76
+
77
+ public override bool CanSeek {
78
+ get { return stream.CanSeek; }
79
+ }
80
+
81
+ public override bool CanWrite {
82
+ get { return stream.CanWrite; }
83
+ }
84
+
85
+ public override long Length {
86
+ get { return stream.Length; }
87
+ }
88
+
89
+ public override long Position {
90
+ get { return stream.Position; }
91
+ set { stream.Position = value; }
92
+ }
93
+
94
+ public override long Seek(long offset, SeekOrigin origin) {
95
+ return stream.Seek(offset, origin);
96
+ }
97
+
98
+ public override void SetLength(long length) {
99
+ stream.SetLength(length);
100
+ }
101
+
102
+ public override void Flush() {
103
+ stream.Flush();
104
+ }
105
+ }
106
+
107
+ public class RC4KeySizeException : Exception
108
+ {
109
+ private int size;
110
+
111
+ public RC4KeySizeException(int size) {
112
+ this.size = size;
113
+ }
114
+
115
+ public override string Message {
116
+ get { return "RC4Stream: invalid key size " + size; }
117
+ }
118
+ }
119
+ }
@@ -0,0 +1,78 @@
1
+ using System;
2
+ using System.IO;
3
+ using System.Net.Sockets;
4
+ using Funny.Crypto;
5
+
6
+ // mcs rc4_echo_client.cs rc4.cs ../../dh64/csharp/dh64.cs
7
+ class MainClass
8
+ {
9
+ private static DH64 dh64 = new DH64();
10
+ private static Random random = new Random();
11
+
12
+ public static void Main(string[] args) {
13
+ TcpClient conn = new TcpClient("127.0.0.1", 10010);
14
+ Console.WriteLine("client connect");
15
+
16
+ Stream stream = conn.GetStream();
17
+ BinaryReader reader = new BinaryReader(stream);
18
+ BinaryWriter writer = ConnInit(reader);
19
+
20
+ byte[] buffer = new byte[1024];
21
+ for (;;) {
22
+ int length = WriteRandomBytes(writer, buffer);
23
+ uint n = reader.ReadUInt32();
24
+ byte[] recv = reader.ReadBytes((int)n);
25
+ if (!ByteArrayEquals(buffer, recv, length)) {
26
+ Console.WriteLine("send != recv");
27
+ Console.WriteLine("send: {0}", BitConverter.ToString(buffer, length));
28
+ Console.WriteLine("recv: {0}", BitConverter.ToString(recv));
29
+ return;
30
+ }
31
+ }
32
+ }
33
+
34
+ // Do DH64 key exchange and return a RC4 writer
35
+ private static BinaryWriter ConnInit(BinaryReader r) {
36
+ ulong privateKey;
37
+ ulong publicKey;
38
+ dh64.KeyPair(out privateKey, out publicKey);
39
+ Console.WriteLine("client public key: {0}", publicKey);
40
+
41
+ new BinaryWriter(r.BaseStream).Write(publicKey);
42
+ ulong srvPublicKey = r.ReadUInt64();
43
+ Console.WriteLine("server public key: {0}", srvPublicKey);
44
+
45
+ ulong secret = dh64.Secret(privateKey, srvPublicKey);
46
+ Console.WriteLine("secret: {0}", secret);
47
+
48
+ byte[] key;
49
+ using (MemoryStream ms = new MemoryStream()) {
50
+ new BinaryWriter(ms).Write(secret);
51
+ key = ms.ToArray();
52
+ }
53
+ Console.WriteLine("key: {0}", BitConverter.ToString(key).Replace("-", "").ToLower());
54
+
55
+ return new BinaryWriter(
56
+ new RC4Stream(r.BaseStream, key)
57
+ );
58
+ }
59
+
60
+ private static int WriteRandomBytes(BinaryWriter w, byte[] buffer) {
61
+ int length = random.Next(buffer.Length);
62
+ for (int i = 0; i < length; i ++) {
63
+ buffer[i] = (byte)random.Next(256);
64
+ }
65
+ w.Write((uint)length);
66
+ w.Write(buffer, 0, length);
67
+ return length;
68
+ }
69
+
70
+ private static bool ByteArrayEquals(byte[] a1, byte[] a2, int length) {
71
+ if (a2.Length != length)
72
+ return false;
73
+ for (int i=0; i<length; i++)
74
+ if (a1[i]!=a2[i])
75
+ return false;
76
+ return true;
77
+ }
78
+ }
@@ -0,0 +1,102 @@
1
+ package main
2
+
3
+ import (
4
+ "bytes"
5
+ "crypto/cipher"
6
+ "crypto/rc4"
7
+ "encoding/hex"
8
+ "flag"
9
+ "github.com/funny/binary"
10
+ dh64 "github.com/funny/crypto/dh64/go"
11
+ "log"
12
+ "math/rand"
13
+ "net"
14
+ "time"
15
+ )
16
+
17
+ func main() {
18
+ addr := flag.String("addr", "127.0.0.1:10010", "server address")
19
+ flag.Parse()
20
+
21
+ conn, err := net.Dial("tcp", *addr)
22
+ if err != nil {
23
+ log.Print("connect failed: ", err)
24
+ return
25
+ }
26
+ log.Print("client connect")
27
+
28
+ writer, reader, err := conn_init(conn)
29
+ if err != nil {
30
+ log.Print("conn init failed: ", err)
31
+ return
32
+ }
33
+
34
+ send := make([]byte, 0, 1024)
35
+ for {
36
+ send = randomData(send)
37
+ writer.WritePacket(send, binary.SplitByUint32LE)
38
+ if writer.Error() != nil {
39
+ log.Print("send failed: ", writer.Error())
40
+ return
41
+ }
42
+ recv := reader.ReadPacket(binary.SplitByUint32LE)
43
+ if reader.Error() != nil {
44
+ log.Print("receive failed: ", reader.Error())
45
+ return
46
+ }
47
+ if !bytes.Equal(send, recv) {
48
+ log.Print("send != recv")
49
+ log.Print("send: ", hex.EncodeToString(send))
50
+ log.Print("recv: ", hex.EncodeToString(recv))
51
+ return
52
+ }
53
+ }
54
+ }
55
+
56
+ // Do DH64 key exchange and return a RC4 writer.
57
+ func conn_init(conn net.Conn) (*binary.Writer, *binary.Reader, error) {
58
+ var (
59
+ writer = binary.NewWriter(conn)
60
+ reader = binary.NewReader(conn)
61
+ )
62
+
63
+ rand.Seed(time.Now().UnixNano())
64
+
65
+ privateKey, publicKey := dh64.KeyPair()
66
+ log.Print("client public key: ", publicKey)
67
+
68
+ writer.WriteUint64LE(publicKey)
69
+ if writer.Error() != nil {
70
+ return nil, nil, writer.Error()
71
+ }
72
+ serverPublicKey := reader.ReadUint64LE()
73
+ if reader.Error() != nil {
74
+ return nil, nil, reader.Error()
75
+ }
76
+ log.Print("server public key: ", serverPublicKey)
77
+
78
+ secert := dh64.Secret(privateKey, serverPublicKey)
79
+ log.Print("secert: ", secert)
80
+
81
+ key := make([]byte, 8)
82
+ binary.PutUint64LE(key, secert)
83
+ rc4stream, err := rc4.NewCipher(key)
84
+ if err != nil {
85
+ return nil, nil, err
86
+ }
87
+ log.Print("key: ", hex.EncodeToString(key))
88
+
89
+ writer = binary.NewWriter(cipher.StreamWriter{
90
+ W: conn,
91
+ S: rc4stream,
92
+ })
93
+ return writer, reader, nil
94
+ }
95
+
96
+ func randomData(b []byte) []byte {
97
+ a := b[:rand.Intn(cap(b))]
98
+ for i := 0; i < len(a); i++ {
99
+ a[i] = byte(rand.Int() % 256)
100
+ }
101
+ return a
102
+ }
@@ -0,0 +1,110 @@
1
+ package main
2
+
3
+ import (
4
+ "crypto/cipher"
5
+ "crypto/rc4"
6
+ "encoding/hex"
7
+ "flag"
8
+ "github.com/funny/binary"
9
+ dh64 "github.com/funny/crypto/dh64/go"
10
+ "log"
11
+ "math/rand"
12
+ "net"
13
+ "time"
14
+ )
15
+
16
+ func main() {
17
+ addr := flag.String("addr", "127.0.0.1:10010", "server address")
18
+ flag.Parse()
19
+
20
+ lsn, err := net.Listen("tcp", *addr)
21
+ if err != nil {
22
+ log.Print("listen failed: ", err)
23
+ return
24
+ }
25
+ log.Print("server wait")
26
+
27
+ for {
28
+ conn, err := lsn.Accept()
29
+ if err != nil {
30
+ log.Print("accept failed: ", err)
31
+ return
32
+ }
33
+ log.Print("server accpet: ", conn.RemoteAddr())
34
+
35
+ go func() {
36
+ writer, reader, err := conn_init(conn)
37
+ if err != nil {
38
+ log.Print("conn init failed: ", err)
39
+ return
40
+ }
41
+
42
+ var (
43
+ lastPrintTime = time.Now()
44
+ sendPacketCount uint64
45
+ recvPacketCount uint64
46
+ )
47
+
48
+ for {
49
+ recv := reader.ReadPacket(binary.SplitByUint32LE)
50
+ if reader.Error() != nil {
51
+ log.Print("receive failed: ", reader.Error())
52
+ return
53
+ }
54
+ recvPacketCount += 1
55
+
56
+ writer.WritePacket(recv, binary.SplitByUint32LE)
57
+ if writer.Error() != nil {
58
+ log.Print("send failed: ", writer.Error())
59
+ return
60
+ }
61
+ sendPacketCount += 1
62
+
63
+ if time.Since(lastPrintTime) > time.Second*2 {
64
+ lastPrintTime = time.Now()
65
+ log.Print("server: ", recvPacketCount, sendPacketCount)
66
+ }
67
+ }
68
+ }()
69
+ }
70
+ }
71
+
72
+ // Do DH64 key exchange and return a RC4 reader.
73
+ func conn_init(conn net.Conn) (*binary.Writer, *binary.Reader, error) {
74
+ var (
75
+ writer = binary.NewWriter(conn)
76
+ reader = binary.NewReader(conn)
77
+ )
78
+
79
+ rand.Seed(time.Now().UnixNano())
80
+
81
+ privateKey, publicKey := dh64.KeyPair()
82
+ log.Print("server public key: ", publicKey)
83
+
84
+ writer.WriteUint64LE(publicKey)
85
+ if writer.Error() != nil {
86
+ return nil, nil, writer.Error()
87
+ }
88
+ clientPublicKey := reader.ReadUint64LE()
89
+ if reader.Error() != nil {
90
+ return nil, nil, reader.Error()
91
+ }
92
+ log.Print("client public key: ", clientPublicKey)
93
+
94
+ secert := dh64.Secret(privateKey, clientPublicKey)
95
+ log.Print("secert: ", secert)
96
+
97
+ key := make([]byte, 8)
98
+ binary.PutUint64LE(key, secert)
99
+ rc4stream, err := rc4.NewCipher(key)
100
+ if err != nil {
101
+ return nil, nil, err
102
+ }
103
+ log.Print("key: ", hex.EncodeToString(key))
104
+
105
+ reader = binary.NewReader(cipher.StreamReader{
106
+ R: conn,
107
+ S: rc4stream,
108
+ })
109
+ return writer, reader, nil
110
+ }
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0
4
+ cache: bundler
5
+ before_install: gem install rspec
6
+ before_install: gem install rake
7
+ script: bundle exec rake
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ rubyretriever (1.4.5)
5
+ addressable
6
+ bloomfilter-rb
7
+ em-http-request
8
+ em-synchrony
9
+ htmlentities
10
+ nokogiri
11
+ ruby-progressbar
12
+
13
+ GEM
14
+ remote: https://rubygems.org/
15
+ specs:
16
+ addressable (2.4.0)
17
+ bloomfilter-rb (2.1.1)
18
+ redis
19
+ coderay (1.1.0)
20
+ cookiejar (0.3.0)
21
+ diff-lcs (1.2.5)
22
+ em-http-request (1.1.3)
23
+ addressable (>= 2.3.4)
24
+ cookiejar (<= 0.3.0)
25
+ em-socksify (>= 0.3)
26
+ eventmachine (>= 1.0.3)
27
+ http_parser.rb (>= 0.6.0)
28
+ em-socksify (0.3.1)
29
+ eventmachine (>= 1.0.0.beta.4)
30
+ em-synchrony (1.0.4)
31
+ eventmachine (>= 1.0.0.beta.1)
32
+ eventmachine (1.2.0.1)
33
+ htmlentities (4.3.4)
34
+ http_parser.rb (0.6.0)
35
+ method_source (0.8.2)
36
+ mini_portile2 (2.0.0)
37
+ nokogiri (1.6.7.2)
38
+ mini_portile2 (~> 2.0.0.rc2)
39
+ pry (0.10.1)
40
+ coderay (~> 1.1.0)
41
+ method_source (~> 0.8.1)
42
+ slop (~> 3.4)
43
+ rake (10.3.2)
44
+ redis (3.2.2)
45
+ rspec (2.99.0)
46
+ rspec-core (~> 2.99.0)
47
+ rspec-expectations (~> 2.99.0)
48
+ rspec-mocks (~> 2.99.0)
49
+ rspec-core (2.99.0)
50
+ rspec-expectations (2.99.0)
51
+ diff-lcs (>= 1.1.3, < 2.0)
52
+ rspec-mocks (2.99.0)
53
+ ruby-progressbar (1.7.5)
54
+ slop (3.6.0)
55
+
56
+ PLATFORMS
57
+ ruby
58
+
59
+ DEPENDENCIES
60
+ bundler (~> 1.6)
61
+ pry
62
+ rake (~> 10.3)
63
+ rspec (~> 2.14)
64
+ rubyretriever!
@@ -0,0 +1,20 @@
1
+ 2016 (c) Joseph Michael Norton - @JoeNorton - http://Norton.io
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,7 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new(:spec) do |task|
4
+ task.rspec_opts = %w(--color --format d)
5
+ end
6
+
7
+ task default: :spec
@@ -0,0 +1,79 @@
1
+ #! /usr/bin/env ruby
2
+ require 'retriever'
3
+ require 'optparse'
4
+
5
+ options = {}
6
+ optparse = OptionParser.new do |opts|
7
+ # Set a banner, displayed at the top
8
+ # of the help screen.
9
+ opts.banner = 'Usage: rr [MODE FLAG] [options] Target_URL'
10
+ options['sitemap'] = false
11
+ opts.on('-s', '--sitemap [FORMAT]', 'MODE FLAG: Sitemap mode') do |type|
12
+ options['sitemap'] = type || ''
13
+ end
14
+ options['fileharvest'] = false
15
+ opts.on('-f', '--files FILETYPE', 'MODE FLAG: Fileharvest mode') do |file_e|
16
+ options['fileharvest'] = file_e
17
+ end
18
+ options['seo'] = false
19
+ opts.on('-e', '--seo', 'MODE FLAG: SEO mode') do
20
+ options['seo'] = true
21
+ end
22
+ options['filename'] = nil
23
+ opts.on('-o', '--out FILENAME', 'Dump output to file') do |file|
24
+ options['filename'] = file
25
+ end
26
+ # Define the options, and what they do
27
+ options['verbose'] = false
28
+ opts.on('-v', '--verbose', 'Output more information') do
29
+ options['verbose'] = true
30
+ end
31
+ options['progress'] = false
32
+ opts.on('-p', '--progress', 'Output progress bar') do
33
+ options['progress'] = true
34
+ end
35
+ options['maxpages'] = 100
36
+ opts.on('-l',
37
+ '--limit PAGE_LIMIT_#',
38
+ 'set a max on the total number of crawled pages') do |maxp|
39
+ options['maxpages'] = maxp
40
+ end
41
+ options['autodown'] = false
42
+ opts.on('-a', '--auto', 'Automatically download all files located') do
43
+ options['autodown'] = true
44
+ end
45
+ # This displays the help screen, all programs are
46
+ # assumed to have this option.
47
+ opts.on('-h', '--help', 'Display this screen') do
48
+ puts opts
49
+ exit
50
+ end
51
+ end
52
+
53
+ optparse.parse!
54
+ if ARGV[0].nil?
55
+ abort("###Missing Required Argument\nUsage: rr [mode] [options] Target_URL")
56
+ end
57
+
58
+ ARGV.each do|q|
59
+ if options['verbose']
60
+ puts '###############################'
61
+ puts '### [RubyRetriever]'
62
+ puts '### Creating Sitemap' if options['sitemap']
63
+ puts "### Outputting in format: #{options['sitemap']}" if options['sitemap']
64
+ puts '### Performing File Harvest' if options['fileharvest']
65
+ if options['fileharvest']
66
+ puts "### Searching for filetype: #{options['fileharvest']}"
67
+ end
68
+ puts '### Performing SEO Scrape' if options['seo']
69
+ puts "### Writing to file: #{options['filename']}" if options['filename']
70
+ puts '### Being verbose'
71
+ puts "### Stopping after #{options['maxpages']} pages"
72
+ end
73
+ puts '###############################'
74
+ puts "### [RubyRetriever] go fetch #{q}"
75
+ Retriever::CLI.new(q, options)
76
+ puts '### [RubyRetriever] is done.'
77
+ puts '###############################'
78
+ puts
79
+ end
@@ -0,0 +1,25 @@
1
+ module Retriever
2
+ #
3
+ class CLI
4
+ def initialize(url, options)
5
+ # kick off the fetch mode of choice
6
+ @fetch = choose_fetch_mode(url, options)
7
+ @fetch.dump
8
+ @fetch.write if options['filename']
9
+ @fetch.autodownload if options['autodown'] && options['fileharvest']
10
+ @fetch.gen_xml if /XML/i =~ options['sitemap'].to_s
11
+ end
12
+
13
+ def choose_fetch_mode(url, options)
14
+ if options['fileharvest']
15
+ Retriever::FetchFiles.new(url, options)
16
+ elsif options['sitemap']
17
+ Retriever::FetchSitemap.new(url, options)
18
+ elsif options['seo']
19
+ Retriever::FetchSEO.new(url, options)
20
+ else
21
+ fail '### Error: No Mode Selected'
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ require 'htmlentities'
2
+ #
3
+ module SourceString
4
+ refine String do
5
+ def decode_html
6
+ HTMLEntities.new.decode(self)
7
+ end
8
+
9
+ def encode_utf8_and_replace
10
+ encode('UTF-8', invalid: :replace, undef: :replace)
11
+ end
12
+ end
13
+ end