wovnrb 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +1 -0
  5. data/.rubocop_todo.yml +308 -0
  6. data/Rakefile +13 -14
  7. data/lib/wovnrb.rb +43 -98
  8. data/lib/wovnrb/api_translator.rb +143 -0
  9. data/lib/wovnrb/headers.rb +78 -92
  10. data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
  11. data/lib/wovnrb/lang.rb +93 -125
  12. data/lib/wovnrb/railtie.rb +5 -7
  13. data/lib/wovnrb/services/glob.rb +3 -3
  14. data/lib/wovnrb/services/html_converter.rb +192 -0
  15. data/lib/wovnrb/services/html_replace_marker.rb +38 -0
  16. data/lib/wovnrb/services/wovn_logger.rb +8 -4
  17. data/lib/wovnrb/settings.rb +5 -3
  18. data/lib/wovnrb/store.rb +35 -26
  19. data/lib/wovnrb/text_caches/cache_base.rb +3 -2
  20. data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
  21. data/lib/wovnrb/version.rb +1 -1
  22. data/test/fixtures/html/test.html +8 -0
  23. data/test/fixtures/html/test_translated.html +8 -0
  24. data/test/lib/api_translator_test.rb +109 -0
  25. data/test/lib/headers_test.rb +84 -55
  26. data/test/lib/lang_test.rb +157 -357
  27. data/test/lib/services/glob_test.rb +1 -1
  28. data/test/lib/services/html_converter_test.rb +166 -0
  29. data/test/lib/services/html_replace_marker_test.rb +75 -0
  30. data/test/lib/services/wovn_logger_test.rb +6 -6
  31. data/test/lib/store_test.rb +25 -69
  32. data/test/lib/text_caches/cache_base_test.rb +1 -1
  33. data/test/lib/text_caches/memory_cache_test.rb +10 -11
  34. data/test/lib/wovnrb_test.rb +77 -310
  35. data/test/test_helper.rb +22 -32
  36. data/wovnrb.gemspec +35 -44
  37. metadata +86 -205
  38. data/ext/dom/Makefile +0 -239
  39. data/lib/wovnrb/api_data.rb +0 -59
  40. data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
  41. data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
  42. data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
  43. data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
  44. data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
  45. data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
  46. data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
  47. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
  48. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
  49. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
  50. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
  51. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
  52. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
  53. data/lib/wovnrb/services/url.rb +0 -12
  54. data/lib/wovnrb/services/value_agent.rb +0 -9
  55. data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
  56. data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
  57. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
  58. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
  59. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
  60. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
  61. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
  62. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
  63. data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
  64. data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
  65. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
  66. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
  67. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
  68. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
  69. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
  70. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
  71. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
  72. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
  73. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
  74. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
  75. data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
  76. data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
  77. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
  78. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
  79. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
  80. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
  81. data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
  82. data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
  83. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
  84. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
  85. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
  86. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
  87. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
  88. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
  89. data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
  90. data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
  91. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
  92. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
  93. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
  94. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
  95. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
  96. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
  97. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
  98. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
  99. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
  100. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
  101. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
  102. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
  103. data/test/lib/api_data_test.rb +0 -83
  104. data/test/lib/html_replacers/image_replacer_test.rb +0 -165
  105. data/test/lib/html_replacers/input_replacer_test.rb +0 -140
  106. data/test/lib/html_replacers/link_replacer_test.rb +0 -328
  107. data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
  108. data/test/lib/html_replacers/replacer_base_test.rb +0 -128
  109. data/test/lib/html_replacers/script_replacer_test.rb +0 -139
  110. data/test/lib/html_replacers/text_replacer_test.rb +0 -99
  111. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
  112. data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
  113. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
  114. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
  115. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
  116. data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
  117. data/test/lib/services/url_test.rb +0 -9
  118. data/test/lib/services/value_agent_test.rb +0 -32
  119. data/test/services/url_test.rb +0 -163
  120. data/values/values +0 -1
data/ext/dom/Makefile DELETED
@@ -1,239 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- Q1 = $(V:1=)
7
- Q = $(Q1:0=@)
8
- ECHO1 = $(V:1=@:)
9
- ECHO = $(ECHO1:0=@echo)
10
-
11
- #### Start of system configuration section. ####
12
-
13
- srcdir = .
14
- topdir = /Users/hal0884/.rbenv/versions/2.1.0/include/ruby-2.1.0
15
- hdrdir = $(topdir)
16
- arch_hdrdir = /Users/hal0884/.rbenv/versions/2.1.0/include/ruby-2.1.0/x86_64-darwin14.0
17
- PATH_SEPARATOR = :
18
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
19
- prefix = $(DESTDIR)/Users/hal0884/.rbenv/versions/2.1.0
20
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
21
- rubyarchprefix = $(rubylibprefix)/$(arch)
22
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
23
- exec_prefix = $(prefix)
24
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
25
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
26
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
27
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
28
- sitehdrdir = $(rubyhdrdir)/site_ruby
29
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
30
- vendorarchdir = $(vendorlibdir)/$(sitearch)
31
- vendorlibdir = $(vendordir)/$(ruby_version)
32
- vendordir = $(rubylibprefix)/vendor_ruby
33
- sitearchdir = $(sitelibdir)/$(sitearch)
34
- sitelibdir = $(sitedir)/$(ruby_version)
35
- sitedir = $(rubylibprefix)/site_ruby
36
- rubyarchdir = $(rubylibdir)/$(arch)
37
- rubylibdir = $(rubylibprefix)/$(ruby_version)
38
- sitearchincludedir = $(includedir)/$(sitearch)
39
- archincludedir = $(includedir)/$(arch)
40
- sitearchlibdir = $(libdir)/$(sitearch)
41
- archlibdir = $(libdir)/$(arch)
42
- ridir = $(datarootdir)/$(RI_BASE_NAME)
43
- mandir = $(datarootdir)/man
44
- localedir = $(datarootdir)/locale
45
- libdir = $(exec_prefix)/lib
46
- psdir = $(docdir)
47
- pdfdir = $(docdir)
48
- dvidir = $(docdir)
49
- htmldir = $(docdir)
50
- infodir = $(datarootdir)/info
51
- docdir = $(datarootdir)/doc/$(PACKAGE)
52
- oldincludedir = $(DESTDIR)/usr/include
53
- includedir = $(prefix)/include
54
- localstatedir = $(prefix)/var
55
- sharedstatedir = $(prefix)/com
56
- sysconfdir = $(prefix)/etc
57
- datadir = $(datarootdir)
58
- datarootdir = $(prefix)/share
59
- libexecdir = $(exec_prefix)/libexec
60
- sbindir = $(exec_prefix)/sbin
61
- bindir = $(exec_prefix)/bin
62
- archdir = $(rubyarchdir)
63
-
64
-
65
- CC = gcc-4.2
66
- CXX = g++
67
- LIBRUBY = $(LIBRUBY_SO)
68
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
69
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
70
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static -framework CoreFoundation
71
- empty =
72
- OUTFLAG = -o $(empty)
73
- COUTFLAG = -o $(empty)
74
-
75
- RUBY_EXTCONF_H =
76
- cflags = $(optflags) $(debugflags) $(warnflags)
77
- optflags = -O3 -fno-fast-math
78
- debugflags = -ggdb3
79
- warnflags = -Wall -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Wunused-variable -Wpointer-arith -Wwrite-strings -Wdeclaration-after-statement -Wshorten-64-to-32 -Wimplicit-function-declaration -Wdivision-by-zero -Wextra-tokens
80
- CCDLFLAGS = -fno-common
81
- CFLAGS = $(CCDLFLAGS) -O3 -Wno-error=shorten-64-to-32 -fno-common -pipe $(ARCH_FLAG)
82
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
83
- DEFS =
84
- CPPFLAGS = -I/Users/hal0884/.rbenv/versions/2.1.0/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT $(DEFS) $(cppflags)
85
- CXXFLAGS = $(CCDLFLAGS) $(cxxflags) $(ARCH_FLAG)
86
- ldflags = -L. -L/Users/hal0884/.rbenv/versions/2.1.0/lib -fstack-protector
87
- dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress
88
- ARCH_FLAG =
89
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
90
- LDSHARED = $(CC) -dynamic -bundle
91
- LDSHAREDXX = $(CXX) -dynamic -bundle
92
- AR = ar
93
- EXEEXT =
94
-
95
- RUBY_INSTALL_NAME = ruby
96
- RUBY_SO_NAME = ruby.2.1.0
97
- RUBYW_INSTALL_NAME =
98
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
99
- RUBYW_BASE_NAME = rubyw
100
- RUBY_BASE_NAME = ruby
101
-
102
- arch = x86_64-darwin14.0
103
- sitearch = $(arch)
104
- ruby_version = 2.1.0
105
- ruby = $(bindir)/ruby
106
- RUBY = $(ruby)
107
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
108
-
109
- RM = rm -f
110
- RM_RF = $(RUBY) -run -e rm -- -rf
111
- RMDIRS = rmdir -p
112
- MAKEDIRS = mkdir -p
113
- INSTALL = /usr/bin/install -c
114
- INSTALL_PROG = $(INSTALL) -m 0755
115
- INSTALL_DATA = $(INSTALL) -m 644
116
- COPY = cp
117
- TOUCH = exit >
118
-
119
- #### End of system configuration section. ####
120
-
121
- preload =
122
-
123
- libpath = . $(libdir)
124
- LIBPATH = -L. -L$(libdir)
125
- DEFFILE =
126
-
127
- CLEANFILES = mkmf.log
128
- DISTCLEANFILES =
129
- DISTCLEANDIRS =
130
-
131
- extout =
132
- extout_prefix =
133
- target_prefix = /dom
134
- LOCAL_LIBS =
135
- LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
136
- ORIG_SRCS = dom_wrap.cxx dom.cpp
137
- SRCS = $(ORIG_SRCS)
138
- OBJS = dom_wrap.o dom.o
139
- HDRS = $(srcdir)/dom.h
140
- TARGET = dom
141
- TARGET_NAME = dom
142
- TARGET_ENTRY = Init_$(TARGET_NAME)
143
- DLLIB = $(TARGET).bundle
144
- EXTSTATIC =
145
- STATIC_LIB =
146
-
147
- TIMESTAMP_DIR = .
148
- BINDIR = $(bindir)
149
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
150
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
151
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
152
- HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
153
- ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
154
-
155
- TARGET_SO = $(DLLIB)
156
- CLEANLIBS = $(TARGET).bundle
157
- CLEANOBJS = *.o *.bak
158
-
159
- all: $(DLLIB)
160
- static: $(STATIC_LIB)
161
- .PHONY: all install static install-so install-rb
162
- .PHONY: clean clean-so clean-static clean-rb
163
-
164
- clean-static::
165
- clean-rb-default::
166
- clean-rb::
167
- clean-so::
168
- clean: clean-so clean-static clean-rb-default clean-rb
169
- -$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
170
-
171
- distclean-rb-default::
172
- distclean-rb::
173
- distclean-so::
174
- distclean-static::
175
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
176
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
177
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
178
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
179
-
180
- realclean: distclean
181
- install: install-so install-rb
182
-
183
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.RUBYARCHDIR.-.dom.time
184
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
185
- clean-static::
186
- -$(Q)$(RM) $(STATIC_LIB)
187
- install-rb: pre-install-rb install-rb-default
188
- install-rb-default: pre-install-rb-default
189
- pre-install-rb: Makefile
190
- pre-install-rb-default: Makefile
191
- pre-install-rb-default:
192
- $(ECHO) installing default dom libraries
193
- $(TIMESTAMP_DIR)/.RUBYARCHDIR.-.dom.time:
194
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
195
- $(Q) $(TOUCH) $@
196
-
197
- site-install: site-install-so site-install-rb
198
- site-install-so: install-so
199
- site-install-rb: install-rb
200
-
201
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .C .o
202
-
203
- .cc.o:
204
- $(ECHO) compiling $(<)
205
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
206
-
207
- .mm.o:
208
- $(ECHO) compiling $(<)
209
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
210
-
211
- .cxx.o:
212
- $(ECHO) compiling $(<)
213
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
214
-
215
- .cpp.o:
216
- $(ECHO) compiling $(<)
217
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
218
-
219
- .C.o:
220
- $(ECHO) compiling $(<)
221
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
222
-
223
- .c.o:
224
- $(ECHO) compiling $(<)
225
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
226
-
227
- .m.o:
228
- $(ECHO) compiling $(<)
229
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
230
-
231
- $(DLLIB): $(OBJS) Makefile
232
- $(ECHO) linking shared-object dom/$(DLLIB)
233
- -$(Q)$(RM) $(@)
234
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
235
- $(Q) $(POSTLINK)
236
-
237
-
238
-
239
- $(OBJS): $(HDRS) $(ruby_headers)
@@ -1,59 +0,0 @@
1
- module Wovnrb
2
- class ApiData
3
- def initialize(access_url, store)
4
- @access_url = access_url.gsub(/\/$/, '')
5
- @store = store
6
- end
7
-
8
- def get_data
9
- cache_key = to_key(@access_url)
10
- data = get_data_value(cache_key)
11
- JSON.parse(data)
12
- end
13
-
14
- private
15
- def get_data_value(cache_key)
16
- cache_value = CacheBase.get_single.get(cache_key)
17
- return cache_value if cache_value
18
-
19
- uri = build_api_uri
20
- begin
21
- response = get_from_api_server(uri)
22
- rescue => e
23
- response = '{}'
24
- WovnLogger.instance.error("API server GET request failed :\nurl: #{uri}\n#{e.message}")
25
- end
26
-
27
- # Always cache response, even when error returns to avoid DDOS
28
- CacheBase.get_single.put(cache_key, response)
29
- response
30
- end
31
-
32
- @@cache_prefix = 'api::cache::'
33
- def to_key(url)
34
- "::" + @store.settings['project_token'] + "::#{@@cache_prefix}#{url}"
35
- end
36
-
37
- def build_api_uri
38
- t = CGI::escape(@store.settings['project_token'])
39
- u = CGI::escape(@access_url)
40
- URI.parse("#{@store.settings['api_url']}?token=#{t}&url=#{u}")
41
- end
42
-
43
- def get_from_api_server(uri)
44
- http = Net::HTTP.new(uri.host, uri.port)
45
- http.use_ssl = true if uri.scheme == 'https'
46
- http.open_timeout = @store.settings['api_timeout_seconds']
47
- http.read_timeout= @store.settings['api_timeout_seconds']
48
- response = http.start {
49
- http.get(uri.request_uri)
50
- }
51
-
52
- if response.code == '200'
53
- response.body
54
- else
55
- raise "Response Code is not success: #{response.code}"
56
- end
57
- end
58
- end
59
- end
@@ -1,69 +0,0 @@
1
- module Wovnrb
2
- class ImageReplacer < ReplacerBase
3
- def initialize(store, url, text_index, src_index, img_src_prefix, host_aliases)
4
- super(store)
5
- @url = url
6
- @text_index = text_index
7
- @src_index = src_index
8
- @img_src_prefix = img_src_prefix
9
- @host_aliases = host_aliases
10
- end
11
-
12
- def replace(dom, lang)
13
- dom.xpath('.//img').each do |node|
14
- next if wovn_ignore?(node)
15
-
16
- # use regular expressions to support case insensitivity (right?)
17
- if node.to_html =~ /src=['"][^'"]*['"]/i
18
- src = node.to_html.match(/src=['"]([^'"]*)['"]/i)[1]
19
- # THIS SRC CORRECTION DOES NOT HANDLE ONE IMPORTANT CASE
20
- # 1) "../path/with/ellipse"
21
- # if this is not an absolute src
22
- if src !~ /:\/\//
23
- # if this is a path with a leading slash
24
- if src =~ /^\//
25
- src = join_path("#{@url[:protocol]}://#{@url[:host]}", src)
26
- else
27
- src = join_path("#{@url[:protocol]}://#{@url[:host]}#{@url[:path]}", src)
28
- end
29
- end
30
-
31
- unless replace_src_if_match(node, lang, src)
32
- # host name exclude port number
33
- host_match = %r!://([^/:]+)!.match(src)
34
- host_name = host_match ? host_match[1] : ''
35
-
36
- # replace image if match host alias
37
- if host_match and @host_aliases.include?(host_name)
38
- @host_aliases.find do |host_alias|
39
- src_alias = src.gsub(host_name, host_alias)
40
- replace_src_if_match(node, lang, src_alias)
41
- end
42
- end
43
- end
44
- end
45
-
46
- if node.get_attribute('alt')
47
- alt = node.get_attribute('alt').strip
48
- if @text_index[alt] && @text_index[alt][lang.lang_code] && @text_index[alt][lang.lang_code].size > 0
49
- add_comment_node(node, alt)
50
- node.attribute('alt').value = replace_text(alt, @text_index[alt][lang.lang_code][0]['data'])
51
- end
52
- end
53
- end
54
- end
55
-
56
- private
57
- def replace_src_if_match(node, lang, src)
58
- # shouldn't need size check, but for now...
59
- if @src_index[src] && @src_index[src][lang.lang_code] && @src_index[src][lang.lang_code].size > 0
60
- node.attribute('src').value = "#{@img_src_prefix}#{@src_index[src][lang.lang_code][0]['data']}"
61
- end
62
- end
63
-
64
- def join_path(x, y)
65
- separator = (x[-1] != '/' and y[0] != '/') ? '/' : ''
66
- "#{x}#{separator}#{y}"
67
- end
68
- end
69
- end
@@ -1,38 +0,0 @@
1
- module Wovnrb
2
- class InputReplacer < ReplacerBase
3
- def initialize(store, text_index)
4
- super(store)
5
- @text_index = text_index
6
- end
7
-
8
- def replace(dom, lang)
9
- dom.xpath('.//input').each do |node|
10
- next if wovn_ignore?(node)
11
-
12
- set_attribute('value', node, lang) if replaceable_value? node
13
- set_attribute('placeholder', node, lang) if replaceable_placeholder? node
14
- end
15
- end
16
-
17
- private
18
-
19
- def set_attribute(name, node, lang)
20
- node_value = node.get_attribute(name).strip
21
- if @text_index[node_value] && @text_index[node_value][lang.lang_code] && @text_index[node_value][lang.lang_code].size > 0
22
- node.set_attribute(name, replace_text(node_value, @text_index[node_value][lang.lang_code][0]['data']))
23
- end
24
- end
25
-
26
- def replaceable_value?(node)
27
- return false unless ['submit', 'reset'].include? node.get_attribute('type')
28
-
29
- attribute_value = node.get_attribute('value')
30
- attribute_value && !attribute_value.empty?
31
- end
32
-
33
- def replaceable_placeholder?(node)
34
- attribute_placeholder = node.get_attribute('placeholder')
35
- attribute_placeholder && !attribute_placeholder.empty?
36
- end
37
- end
38
- end
@@ -1,78 +0,0 @@
1
- module Wovnrb
2
- class LinkReplacer < ReplacerBase
3
- module FileExtension
4
- IMG_FILES = '(?!jp$)jpe?g?|bmp|gif|png|btif|tiff?|psd|djvu?|xif|wbmp|webp|p(n|b|g|p)m|rgb|tga|x(b|p)m|xwd|pic|ico|fh(c|4|5|7)?|xif|f(bs|px|st)'
5
- AUDIO_FILES = 'mp(3|2)|m(p?2|3|p?4|pg)a|midi?|kar|rmi|web(m|a)|aif(f?|c)|w(ma|av|ax)|m(ka|3u)|sil|s3m|og(a|g)|uvv?a'
6
- VIDEO_FILES = 'm(x|4)u|fl(i|v)|3g(p|2)|jp(gv|g?m)|mp(4v?|g4|(?!$)e?g?)|m(1|2)v|ogv|m(ov|ng)|qt|uvv?(h|m|p|s|v)|dvb|mk(v|3d|s)|f4v|as(x|f)|w(m(v|x)|vx)|xvid'
7
- DOC_FILES = 'zip|tar|ez|aw|atom(cat|svc)?|(cc)?xa?ml|cdmi(a|c|d|o|q)?|epub|g(ml|px|xf)|jar|js|ser|class|json(ml)?|do(c|t)m?|xps|pp(a|tx?|s)m?|potm?|sldm|mp(p|t)|bin|dms|lrf|mar|so|dist|distz|m?pkg|bpk|dump|rtf|tfi|pdf|pgp|apk|o(t|d)(b|c|ft?|g|h|i|p|s|t)'
8
- end
9
-
10
- def initialize(store, pattern, headers)
11
- super(store)
12
- @pattern = pattern
13
- @headers = headers
14
- end
15
-
16
-
17
- def replace(dom, lang)
18
- base_url = base_href(dom)
19
-
20
- dom.xpath('//*[match(.)]', MultiTagMatcher.new).each do |node|
21
- next if wovn_ignore?(node)
22
-
23
- href = node.get_attribute('href')
24
- next if href =~ /^\s*\{\{.+\}\}\s*$/
25
- next if href =~ /^\s*javascript:/i
26
- next if is_file?(href)
27
-
28
- new_href = href
29
- new_href = adjust_link_by_base(new_href, base_url) if base_url
30
- new_href = lang.add_lang_code(new_href, @pattern, @headers)
31
-
32
- node.set_attribute('href', new_href)
33
- end
34
- end
35
-
36
- private
37
-
38
- def adjust_link_by_base(href, base_url)
39
- return href if href =~ /^\// # absolute path
40
- return href if href =~ /^http(s?):\/\// # full url
41
-
42
- File.join(base_url, href)
43
- end
44
-
45
- def is_file?(href)
46
- img_files = /^(https?:\/\/)?.*(\.(#{FileExtension::IMG_FILES}))((\?|#).*)?$/i
47
- audio_files = /^(https?:\/\/)?.*(\.(#{FileExtension::AUDIO_FILES}))((\?|#).*)?$/i
48
- video_files = /^(https?:\/\/)?.*(\.(#{FileExtension::VIDEO_FILES}))((\?|#).*)?$/i
49
- doc_files = /^(https?:\/\/)?.*(\.(#{FileExtension::DOC_FILES}))((\?|#).*)?$/i
50
- href =~ img_files || href =~ audio_files || href =~ video_files || href =~ doc_files
51
- end
52
-
53
- def base_href(dom)
54
- base_tag = dom.xpath('//base').first
55
- return nil unless base_tag
56
-
57
- href = base_tag.get_attribute('href')
58
- return href if href =~ /^\// # absolute path
59
- return href if href =~ /^http(s?):\/\// # full url
60
-
61
- Addressable::URI.join('/', @headers.dirname, href).to_s
62
- end
63
- end
64
-
65
- class MultiTagMatcher
66
- def match(node_set)
67
- node_set.find_all { |node| a_tag?(node) || link_tag_with_canonical?(node) }
68
- end
69
-
70
- def a_tag?(node)
71
- node.name == 'a'
72
- end
73
-
74
- def link_tag_with_canonical?(node)
75
- node.name == 'link' && node.get_attribute('rel') == 'canonical'
76
- end
77
- end
78
- end