wovnrb 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +1 -0
  5. data/.rubocop_todo.yml +308 -0
  6. data/Rakefile +13 -14
  7. data/lib/wovnrb.rb +43 -98
  8. data/lib/wovnrb/api_translator.rb +143 -0
  9. data/lib/wovnrb/headers.rb +78 -92
  10. data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
  11. data/lib/wovnrb/lang.rb +93 -125
  12. data/lib/wovnrb/railtie.rb +5 -7
  13. data/lib/wovnrb/services/glob.rb +3 -3
  14. data/lib/wovnrb/services/html_converter.rb +192 -0
  15. data/lib/wovnrb/services/html_replace_marker.rb +38 -0
  16. data/lib/wovnrb/services/wovn_logger.rb +8 -4
  17. data/lib/wovnrb/settings.rb +5 -3
  18. data/lib/wovnrb/store.rb +35 -26
  19. data/lib/wovnrb/text_caches/cache_base.rb +3 -2
  20. data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
  21. data/lib/wovnrb/version.rb +1 -1
  22. data/test/fixtures/html/test.html +8 -0
  23. data/test/fixtures/html/test_translated.html +8 -0
  24. data/test/lib/api_translator_test.rb +109 -0
  25. data/test/lib/headers_test.rb +84 -55
  26. data/test/lib/lang_test.rb +157 -357
  27. data/test/lib/services/glob_test.rb +1 -1
  28. data/test/lib/services/html_converter_test.rb +166 -0
  29. data/test/lib/services/html_replace_marker_test.rb +75 -0
  30. data/test/lib/services/wovn_logger_test.rb +6 -6
  31. data/test/lib/store_test.rb +25 -69
  32. data/test/lib/text_caches/cache_base_test.rb +1 -1
  33. data/test/lib/text_caches/memory_cache_test.rb +10 -11
  34. data/test/lib/wovnrb_test.rb +77 -310
  35. data/test/test_helper.rb +22 -32
  36. data/wovnrb.gemspec +35 -44
  37. metadata +86 -205
  38. data/ext/dom/Makefile +0 -239
  39. data/lib/wovnrb/api_data.rb +0 -59
  40. data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
  41. data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
  42. data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
  43. data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
  44. data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
  45. data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
  46. data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
  47. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
  48. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
  49. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
  50. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
  51. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
  52. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
  53. data/lib/wovnrb/services/url.rb +0 -12
  54. data/lib/wovnrb/services/value_agent.rb +0 -9
  55. data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
  56. data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
  57. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
  58. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
  59. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
  60. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
  61. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
  62. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
  63. data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
  64. data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
  65. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
  66. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
  67. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
  68. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
  69. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
  70. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
  71. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
  72. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
  73. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
  74. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
  75. data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
  76. data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
  77. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
  78. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
  79. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
  80. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
  81. data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
  82. data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
  83. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
  84. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
  85. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
  86. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
  87. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
  88. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
  89. data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
  90. data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
  91. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
  92. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
  93. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
  94. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
  95. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
  96. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
  97. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
  98. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
  99. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
  100. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
  101. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
  102. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
  103. data/test/lib/api_data_test.rb +0 -83
  104. data/test/lib/html_replacers/image_replacer_test.rb +0 -165
  105. data/test/lib/html_replacers/input_replacer_test.rb +0 -140
  106. data/test/lib/html_replacers/link_replacer_test.rb +0 -328
  107. data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
  108. data/test/lib/html_replacers/replacer_base_test.rb +0 -128
  109. data/test/lib/html_replacers/script_replacer_test.rb +0 -139
  110. data/test/lib/html_replacers/text_replacer_test.rb +0 -99
  111. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
  112. data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
  113. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
  114. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
  115. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
  116. data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
  117. data/test/lib/services/url_test.rb +0 -9
  118. data/test/lib/services/value_agent_test.rb +0 -32
  119. data/test/services/url_test.rb +0 -163
  120. data/values/values +0 -1
@@ -1,12 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- text value1
7
- <p>text value2</p>
8
- text value3
9
- </span>
10
- </div>
11
- </body>
12
- </html>
@@ -1,22 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "text value1"
7
- ]
8
- },
9
- {
10
- "xpath": "/html/body/div/span/p/text()",
11
- "srcs": [
12
- "text value2"
13
- ]
14
- },
15
- {
16
- "xpath": "/html/body/div/text()[2]",
17
- "srcs": [
18
- "text value3",
19
- "</span>"
20
- ]
21
- }
22
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- text value1<br>text value2
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,12 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "text value1",
7
- "<br>",
8
- "text value2",
9
- "</span>"
10
- ]
11
- }
12
- ]
@@ -1,12 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- <!-- <span>comment1</span>-->
7
- text value
8
- <!-- <span>comment2</span>-->
9
- </span>
10
- </div>
11
- </body>
12
- </html>
@@ -1,10 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "text value",
7
- "</span>"
8
- ]
9
- }
10
- ]
@@ -1,7 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <p>&nbsp;</p>
5
- <p>a<span>This is &lt;a&gt; text value!</span></p>
6
- </body>
7
- </html>
@@ -1,11 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/p[2]/text()",
4
- "srcs": [
5
- "a",
6
- "<span>",
7
- "This is &lt;a&gt; text value!",
8
- "</span>"
9
- ]
10
- }
11
- ]
@@ -1,14 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <div>
6
- <div>
7
- <div>
8
- text value1
9
- </div>
10
- </div>
11
- </div>
12
- </div>
13
- </body>
14
- </html>
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/div/div/div/text()",
4
- "srcs": [
5
- "text value1"
6
- ]
7
- }
8
- ]
@@ -1,20 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- text value1
6
- <span>
7
- text value2
8
- <span>
9
- text value3
10
- <span>
11
- text value4
12
- </span>
13
- text value5
14
- </span>
15
- text value6
16
- </span>
17
- text value7
18
- </div>
19
- </body>
20
- </html>
@@ -1,20 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "text value1",
6
- "<span>",
7
- "text value2",
8
- "<span>",
9
- "text value3",
10
- "<span>",
11
- "text value4",
12
- "</span>",
13
- "text value5",
14
- "</span>",
15
- "text value6",
16
- "</span>",
17
- "text value7"
18
- ]
19
- }
20
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- text value1<br>text value2
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,12 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "text value1",
7
- "<br>",
8
- "text value2",
9
- "</span>"
10
- ]
11
- }
12
- ]
@@ -1,12 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <div>
6
- <span>
7
- <p></p>
8
- </span>
9
- </div>
10
- </div>
11
- </body>
12
- </html>
@@ -1,12 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- text value1
7
- <img src='sample.jpg'/>
8
- text value2
9
- </span>
10
- </div>
11
- </body>
12
- </html>
@@ -1,16 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "text value1"
7
- ]
8
- },
9
- {
10
- "xpath": "/html/body/div/text()[2]",
11
- "srcs": [
12
- "text value2",
13
- "</span>"
14
- ]
15
- }
16
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- This is a <a class="ignore-me im-fine">complex</a> text value!
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,13 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "This is a",
7
- "<a wovn-ignore>",
8
- "</a>",
9
- "text value!",
10
- "</span>"
11
- ]
12
- }
13
- ]
@@ -1,12 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <a class="home">
6
- <span><h1>text value1</h1>text value2
7
- <img src="/neocities.png">
8
- text value3</span>
9
- </a><br>
10
- </div>
11
- </body>
12
- </html>
@@ -1,23 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/a/span/h1/text()",
4
- "srcs": [
5
- "text value1"
6
- ]
7
- },
8
- {
9
- "xpath": "/html/body/div/text()[2]",
10
- "srcs": [
11
- "text value2"
12
- ]
13
- },
14
- {
15
- "xpath": "/html/body/div/text()[3]",
16
- "srcs": [
17
- "text value3",
18
- "</span>",
19
- "</a>",
20
- "<br>"
21
- ]
22
- }
23
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- This is a <b>complex <span wovn-ignore>ai<a>ue</a>o</span></b> text value!
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,16 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "This is a",
7
- "<b>",
8
- "complex",
9
- "<span wovn-ignore>",
10
- "</span>",
11
- "</b>",
12
- "text value!",
13
- "</span>"
14
- ]
15
- }
16
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- <b>text value!</b>
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,12 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "<b>",
7
- "text value!",
8
- "</b>",
9
- "</span>"
10
- ]
11
- }
12
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span>
6
- This is a <b>complex</b> text value!
7
- </span>
8
- </div>
9
- </body>
10
- </html>
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/div/text()",
4
- "srcs": [
5
- "<span>",
6
- "This is a",
7
- "<b>",
8
- "complex",
9
- "</b>",
10
- "text value!",
11
- "</span>"
12
- ]
13
- }
14
- ]
@@ -1,9 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <select>
5
- <option value="a">text value1</option>
6
- <option value="b">text value2</option>
7
- </select>
8
- </body>
9
- </html>
@@ -1,13 +0,0 @@
1
- [
2
- {
3
- "xpath": "/html/body/select/text()",
4
- "srcs": [
5
- "<option>",
6
- "text value1",
7
- "</option>",
8
- "<option>",
9
- "text value2",
10
- "</option>"
11
- ]
12
- }
13
- ]
@@ -1,10 +0,0 @@
1
- <html>
2
- <head></head>
3
- <body>
4
- <div>
5
- <span><a href='#'>text value1</a></span>
6
- <p></p>
7
- <strong><em>text value1</em></strong>
8
- </div>
9
- </body>
10
- </html>