wovnrb 3.5.0 → 3.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -0
  3. data/{makefile → Makefile} +0 -0
  4. data/README.en.md +30 -13
  5. data/build.sh +7 -0
  6. data/docker/nginx/Dockerfile +18 -0
  7. data/docker/nginx/README.md +13 -0
  8. data/docker/nginx/build.sh +8 -0
  9. data/docker/nginx/scripts/configure_sshd.sh +25 -0
  10. data/docker/nginx/scripts/startup.sh +10 -0
  11. data/docker/nginx/wovnrb.conf +19 -0
  12. data/docker/rails/Dockerfile +11 -2
  13. data/docker/rails/Dockerfile.ECS +17 -0
  14. data/docker/rails/TestSite/Gemfile +1 -3
  15. data/docker/rails/TestSite/app/controllers/custom_response_controller.rb +1 -1
  16. data/docker/rails/TestSite/config/environments/development.rb +2 -0
  17. data/docker/rails/TestSite/config/environments/production.rb +2 -0
  18. data/docker/rails/TestSite/config/environments/test.rb +2 -0
  19. data/docker/rails/TestSite/package.json +3 -3
  20. data/docker/rails/TestSite/public/index.html +1 -1
  21. data/docker/rails/TestSite/start.sh +2 -11
  22. data/docker/rails/TestSite/start_rails.sh +9 -0
  23. data/docker/rails/TestSite/yarn.lock +1150 -1466
  24. data/docker/scripts/jenkins/build.sh +45 -0
  25. data/docker/scripts/jenkins/tag_and_push_image.sh +30 -0
  26. data/docker/scripts/jenkins/taskdef.json +104 -0
  27. data/docker/scripts/jenkins/taskdef.json.bak +99 -0
  28. data/lib/wovnrb/api_translator.rb +6 -1
  29. data/lib/wovnrb/headers.rb +40 -66
  30. data/lib/wovnrb/services/html_converter.rb +17 -1
  31. data/lib/wovnrb/services/url.rb +136 -0
  32. data/lib/wovnrb/store.rb +9 -3
  33. data/lib/wovnrb/url_language_switcher.rb +148 -0
  34. data/lib/wovnrb/version.rb +1 -1
  35. data/lib/wovnrb.rb +5 -2
  36. data/test/lib/api_translator_test.rb +49 -2
  37. data/test/lib/headers_test.rb +659 -6223
  38. data/test/lib/lang_test.rb +0 -265
  39. data/test/lib/services/html_converter_test.rb +219 -40
  40. data/test/lib/services/url_test.rb +308 -0
  41. data/test/lib/store_test.rb +1 -1
  42. data/test/lib/url_language_switcher_test.rb +946 -0
  43. data/test/lib/wovnrb_test.rb +15 -3
  44. data/test/test_helper.rb +15 -4
  45. data/wovnrb.gemspec +0 -1
  46. metadata +25 -20
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env bash
2
+ set -eux
3
+ export AWS_PROFILE="wovn-code-staging-pipeline"
4
+ export AWS_REGION="us-west-2"
5
+ export ECR_HOST="257024234524.dkr.ecr.us-west-2.amazonaws.com"
6
+ REPO_NAME_WOVNRB="wovnrb"
7
+ REPO_NAME_NGINX="wovnrb-nginx"
8
+ CLUSTER_NAME="wovn-library-testing"
9
+ TASKDEF_FAMILY_NAME="wovnrb"
10
+ ECS_SERVICE_NAME="wovnrb"
11
+ PROJECT_DIR=$(dirname "$0")/../../..
12
+
13
+ commit_hash=$(git rev-parse --short HEAD)
14
+ image_tag="${commit_hash}"
15
+
16
+ sh ${PROJECT_DIR}/build.sh "${REPO_NAME_WOVNRB}":"${image_tag}"
17
+ sh ${PROJECT_DIR}/docker/nginx/build.sh "${REPO_NAME_NGINX}":"${image_tag}"
18
+
19
+ source ${PROJECT_DIR}/docker/scripts/jenkins/tag_and_push_image.sh
20
+
21
+ set +x
22
+ $(aws ecr get-login --no-include-email --region "${AWS_REGION}" --profile "${AWS_PROFILE}")
23
+ set -x
24
+
25
+ tag_and_push_image "${AWS_REGION}" "${REPO_NAME_WOVNRB}" "${image_tag}" "staging"
26
+ tag_and_push_image "${AWS_REGION}" "${REPO_NAME_NGINX}" "${image_tag}" "staging"
27
+
28
+ sed -i "s#wovnrb:latest#"${REPO_NAME_WOVNRB}":"${image_tag}"#g" ${PROJECT_DIR}/docker/scripts/jenkins/taskdef.json
29
+ sed -i "s#wovnrb-nginx:latest#"${REPO_NAME_NGINX}":"${image_tag}"#g" ${PROJECT_DIR}/docker/scripts/jenkins/taskdef.json
30
+
31
+ cd ${PROJECT_DIR}/docker/scripts/jenkins/
32
+ TASKDEF_REVISION=$(aws ecs register-task-definition \
33
+ --profile "${AWS_PROFILE}" --region "${AWS_REGION}" \
34
+ --cli-input-json file://$(pwd)/taskdef.json \
35
+ | jq ."taskDefinition.revision")
36
+ echo "${TASKDEF_REVISION}"
37
+
38
+ echo "Start ECS Rolling deploy. Update ${ECS_SERVICE_NAME} by ${TASKDEF_FAMILY_NAME}:${TASKDEF_REVISION}"
39
+ aws ecs update-service \
40
+ --profile "${AWS_PROFILE}" --region "${AWS_REGION}" \
41
+ --cluster "${CLUSTER_NAME}" \
42
+ --service "${ECS_SERVICE_NAME}" \
43
+ --task-definition "${TASKDEF_FAMILY_NAME}:${TASKDEF_REVISION}"
44
+
45
+ cd -
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Tag to docker image and push it to specified ECR
4
+ #
5
+ # Required environment variables:
6
+ # - AWS_PROFILE (AWS profile authorized for ECR login and S3 release)
7
+ #
8
+ # Args:
9
+ # - region: ECR region (e.g. us-west-1 | us-west-2 )
10
+ # - image-name: docker image name (e.g. equalizer-nginx)
11
+ # - tag: docker image tag. we use git commit hash ordinary (git rev-parse --short HEAD)
12
+
13
+ tag_and_push_image() {
14
+ local aws_region="${1}"
15
+ local image_name="${2}"
16
+ local tag="${3}"
17
+ local account="${4}"
18
+
19
+ if [ ${account} == "production" ]; then
20
+ local ecr_host="140249473629.dkr.ecr.${aws_region}.amazonaws.com"
21
+ elif [ ${account} == "staging" ]; then
22
+ local ecr_host="257024234524.dkr.ecr.${aws_region}.amazonaws.com"
23
+ else
24
+ echo "Passed account not recognized"
25
+ local ecr_host=""
26
+ fi
27
+
28
+ docker tag ${image_name}:${tag} ${ecr_host}/${image_name}:${tag}
29
+ docker push ${ecr_host}/${image_name}:${tag}
30
+ }
@@ -0,0 +1,104 @@
1
+ {
2
+ "executionRoleArn": "arn:aws:iam::257024234524:role/ecsTaskExecutionRole",
3
+ "containerDefinitions": [
4
+ {
5
+ "name": "wovnrb-nginx",
6
+ "cpu": 0,
7
+ "image": "257024234524.dkr.ecr.us-west-2.amazonaws.com/wovnrb-nginx:latest",
8
+ "mountPoints": [],
9
+ "ulimits": [
10
+ {
11
+ "name": "nofile",
12
+ "softLimit": 65535,
13
+ "hardLimit": 65535
14
+ }
15
+ ],
16
+ "logConfiguration": {
17
+ "logDriver": "awslogs",
18
+ "options": {
19
+ "awslogs-group": "/ecs/wovnrb",
20
+ "awslogs-region": "us-west-2",
21
+ "awslogs-stream-prefix": "ecs"
22
+ }
23
+ },
24
+ "essential": true,
25
+ "portMappings": [
26
+ {
27
+ "hostPort": 80,
28
+ "protocol": "tcp",
29
+ "containerPort": 80
30
+ },
31
+ {
32
+ "hostPort": 40022,
33
+ "protocol": "tcp",
34
+ "containerPort": 40022
35
+ }
36
+ ],
37
+ "stopTimeout": 120,
38
+ "dependsOn": [
39
+ {
40
+ "containerName": "wovnrb",
41
+ "condition": "HEALTHY"
42
+ }
43
+ ]
44
+ },
45
+ {
46
+ "name": "wovnrb",
47
+ "cpu": 0,
48
+ "image": "257024234524.dkr.ecr.us-west-2.amazonaws.com/wovnrb:latest",
49
+ "mountPoints": [],
50
+ "workingDirectory": "/usr/src/app",
51
+ "linuxParameters": {
52
+ "initProcessEnabled": true
53
+ },
54
+ "environment": [],
55
+ "ulimits": [
56
+ {
57
+ "name": "nofile",
58
+ "softLimit": 65535,
59
+ "hardLimit": 65535
60
+ }
61
+ ],
62
+ "logConfiguration": {
63
+ "logDriver": "awslogs",
64
+ "options": {
65
+ "awslogs-group": "/ecs/wovnrb",
66
+ "awslogs-region": "us-west-2",
67
+ "awslogs-stream-prefix": "ecs"
68
+ }
69
+ },
70
+ "portMappings": [
71
+ {
72
+ "hostPort": 4000,
73
+ "protocol": "tcp",
74
+ "containerPort": 4000
75
+ },
76
+ {
77
+ "hostPort": 22,
78
+ "protocol": "tcp",
79
+ "containerPort": 22
80
+ }
81
+ ],
82
+ "volumesFrom": [],
83
+ "stopTimeout": 120,
84
+ "healthCheck": {
85
+ "retries": 3,
86
+ "command": [
87
+ "CMD-SHELL",
88
+ "curl -f http://localhost:4000/ || exit 1"
89
+ ],
90
+ "timeout": 20,
91
+ "interval": 60,
92
+ "startPeriod": 180
93
+ },
94
+ "essential": true
95
+ }
96
+ ],
97
+ "placementConstraints": [],
98
+ "cpu": "1024",
99
+ "memory": "2048",
100
+ "requiresCompatibilities": [ "FARGATE" ],
101
+ "family": "wovnrb",
102
+ "networkMode": "awsvpc",
103
+ "volumes": []
104
+ }
@@ -0,0 +1,99 @@
1
+ {
2
+ "executionRoleArn": "arn:aws:iam::257024234524:role/ecsTaskExecutionRole",
3
+ "containerDefinitions": [
4
+ {
5
+ "name": "wovnrb-nginx",
6
+ "cpu": 0,
7
+ "image": "257024234524.dkr.ecr.us-west-2.amazonaws.com/wovnrb-nginx:latest",
8
+ "mountPoints": [],
9
+ "ulimits": [
10
+ {
11
+ "name": "nofile",
12
+ "softLimit": 65535,
13
+ "hardLimit": 65535
14
+ }
15
+ ],
16
+ "logConfiguration": {
17
+ "logDriver": "awslogs",
18
+ "options": {
19
+ "awslogs-group": "/ecs/wovnrb",
20
+ "awslogs-region": "us-west-2",
21
+ "awslogs-stream-prefix": "ecs"
22
+ }
23
+ },
24
+ "essential": true,
25
+ "portMappings": [
26
+ {
27
+ "hostPort": 80,
28
+ "protocol": "tcp",
29
+ "containerPort": 80
30
+ },
31
+ {
32
+ "hostPort": 40022,
33
+ "protocol": "tcp",
34
+ "containerPort": 40022
35
+ }
36
+ ],
37
+ "stopTimeout": 120,
38
+ "dependsOn": [
39
+ {
40
+ "containerName": "wovnrb",
41
+ "condition": "HEALTHY"
42
+ }
43
+ ]
44
+ },
45
+ {
46
+ "name": "wovnrb",
47
+ "cpu": 0,
48
+ "image": "257024234524.dkr.ecr.us-west-2.amazonaws.com/wovnrb:36d9648",
49
+ "mountPoints": [],
50
+ "workingDirectory": "/usr/src/app",
51
+ "linuxParameters": {
52
+ "initProcessEnabled": true
53
+ },
54
+ "environment": [],
55
+ "ulimits": [
56
+ {
57
+ "name": "nofile",
58
+ "softLimit": 65535,
59
+ "hardLimit": 65535
60
+ }
61
+ ],
62
+ "logConfiguration": {
63
+ "logDriver": "awslogs",
64
+ "options": {
65
+ "awslogs-group": "/ecs/wovnrb",
66
+ "awslogs-region": "us-west-2",
67
+ "awslogs-stream-prefix": "ecs"
68
+ }
69
+ },
70
+ "portMappings": [
71
+ {
72
+ "hostPort": 4000,
73
+ "protocol": "tcp",
74
+ "containerPort": 4000
75
+ }
76
+ ],
77
+ "volumesFrom": [],
78
+ "stopTimeout": 120,
79
+ "healthCheck": {
80
+ "retries": 3,
81
+ "command": [
82
+ "CMD-SHELL",
83
+ "curl -f http://localhost:4000/ || exit 1"
84
+ ],
85
+ "timeout": 20,
86
+ "interval": 60,
87
+ "startPeriod": 180
88
+ },
89
+ "essential": true
90
+ }
91
+ ],
92
+ "placementConstraints": [],
93
+ "cpu": "1024",
94
+ "memory": "2048",
95
+ "requiresCompatibilities": [ "FARGATE" ],
96
+ "family": "wovnrb",
97
+ "networkMode": "awsvpc",
98
+ "volumes": []
99
+ }
@@ -111,6 +111,7 @@ module Wovnrb
111
111
  'lang_code' => lang_code,
112
112
  'url_pattern' => url_pattern,
113
113
  'lang_param_name' => lang_param_name,
114
+ 'translate_canonical_tag' => translate_canonical_tag,
114
115
  'product' => 'WOVN.rb',
115
116
  'version' => VERSION,
116
117
  'body' => body
@@ -130,7 +131,7 @@ module Wovnrb
130
131
  end
131
132
 
132
133
  def api_timeout
133
- @store.settings['api_timeout_seconds']
134
+ @headers.search_engine_bot? ? @store.settings['api_timeout_search_engine_bots'] : @store.settings['api_timeout_seconds']
134
135
  end
135
136
 
136
137
  def settings_hash
@@ -157,6 +158,10 @@ module Wovnrb
157
158
  @store.settings['custom_lang_aliases']
158
159
  end
159
160
 
161
+ def translate_canonical_tag
162
+ @store.settings['translate_canonical_tag']
163
+ end
164
+
160
165
  def page_url
161
166
  "#{@headers.protocol}://#{@headers.url}"
162
167
  end
@@ -5,9 +5,9 @@ module Wovnrb
5
5
  # Generates new instance of Wovnrb::Headers.
6
6
  # Its parameters are set by parsing env variable.
7
7
 
8
- def initialize(env, settings)
8
+ def initialize(env, settings, url_lang_switcher)
9
9
  request = Rack::Request.new(env)
10
-
10
+ @url_lang_switcher = url_lang_switcher
11
11
  @env = env
12
12
  @settings = settings
13
13
  @protocol = request.scheme
@@ -31,11 +31,11 @@ module Wovnrb
31
31
  else
32
32
  @env['HTTP_HOST']
33
33
  end
34
- @host = settings['url_pattern'] == 'subdomain' ? remove_lang(@host, lang_code) : @host
34
+ @host = settings['url_pattern'] == 'subdomain' ? @url_lang_switcher.remove_lang_from_uri_component(@host, lang_code) : @host
35
35
  @pathname, @query = @env['REQUEST_URI'].split('?')
36
- @pathname = settings['url_pattern'] == 'path' ? remove_lang(@pathname, lang_code) : @pathname
36
+ @pathname = settings['url_pattern'] == 'path' ? @url_lang_switcher.remove_lang_from_uri_component(@pathname, lang_code) : @pathname
37
37
  @query ||= ''
38
- @url = "#{@host}#{@pathname}#{(@query.empty? ? '' : '?') + remove_lang(@query, lang_code)}"
38
+ @url = "#{@host}#{@pathname}#{(@query.empty? ? '' : '?') + @url_lang_switcher.remove_lang_from_uri_component(@query, lang_code)}"
39
39
  if settings['query'].empty?
40
40
  @query = ''
41
41
  else
@@ -51,11 +51,15 @@ module Wovnrb
51
51
  "?#{query_vals.sort.join('&')}"
52
52
  end
53
53
  end
54
- @query = remove_lang(@query, lang_code)
54
+ @query = @url_lang_switcher.remove_lang_from_uri_component(@query, lang_code)
55
55
  @pathname_with_trailing_slash_if_present = @pathname
56
56
  @pathname = @pathname.gsub(/\/$/, '')
57
57
  end
58
58
 
59
+ def url_with_scheme
60
+ "#{@protocol}://#{@url}"
61
+ end
62
+
59
63
  def unmasked_pathname_without_trailing_slash
60
64
  @unmasked_pathname.chomp('/')
61
65
  end
@@ -99,81 +103,38 @@ module Wovnrb
99
103
  def redirect_location(lang)
100
104
  if lang == @settings['default_lang']
101
105
  # IS THIS RIGHT??
102
- "#{protocol}://#{url}"
103
- # return remove_lang("#{@env['HTTP_HOST']}#{@env['REQUEST_URI']}", lang)
104
- else
105
- # TODO test
106
- lang_code = Store.instance.settings['custom_lang_aliases'][lang] || lang
107
- location = url
108
- case @settings['url_pattern']
109
- when 'query'
110
- lang_param_name = @settings['lang_param_name']
111
- location = if /\?/.match?(location)
112
- "#{location}&#{lang_param_name}=#{lang_code}"
113
- else
114
- "#{location}?#{lang_param_name}=#{lang_code}"
115
- end
116
- when 'subdomain'
117
- location = "#{lang_code.downcase}.#{location}"
118
- # when 'path'
119
- else
120
- location = location.sub(/(\/|$)/, "/#{lang_code}/")
121
- end
122
- "#{protocol}://#{location}"
106
+ return url_with_scheme
123
107
  end
108
+
109
+ @url_lang_switcher.add_lang_code(url_with_scheme, lang, self)
124
110
  end
125
111
 
126
112
  def request_out(_def_lang = @settings['default_lang'])
127
113
  @env['wovnrb.target_lang'] = lang_code
128
114
  case @settings['url_pattern']
129
115
  when 'query'
130
- @env['REQUEST_URI'] = remove_lang(@env['REQUEST_URI']) if @env.key?('REQUEST_URI')
131
- @env['QUERY_STRING'] = remove_lang(@env['QUERY_STRING']) if @env.key?('QUERY_STRING')
132
- @env['ORIGINAL_FULLPATH'] = remove_lang(@env['ORIGINAL_FULLPATH']) if @env.key?('ORIGINAL_FULLPATH')
116
+ @env['REQUEST_URI'] = @url_lang_switcher.remove_lang_from_uri_component(@env['REQUEST_URI'], lang_code) if @env.key?('REQUEST_URI')
117
+ @env['QUERY_STRING'] = @url_lang_switcher.remove_lang_from_uri_component(@env['QUERY_STRING'], lang_code) if @env.key?('QUERY_STRING')
118
+ @env['ORIGINAL_FULLPATH'] = @url_lang_switcher.remove_lang_from_uri_component(@env['ORIGINAL_FULLPATH'], lang_code) if @env.key?('ORIGINAL_FULLPATH')
133
119
  when 'subdomain'
134
120
  if @settings['use_proxy'] && @env.key?('HTTP_X_FORWARDED_HOST')
135
- @env['HTTP_X_FORWARDED_HOST'] = remove_lang(@env['HTTP_X_FORWARDED_HOST'])
121
+ @env['HTTP_X_FORWARDED_HOST'] = @url_lang_switcher.remove_lang_from_uri_component(@env['HTTP_X_FORWARDED_HOST'], lang_code)
136
122
  else
137
- @env['HTTP_HOST'] = remove_lang(@env['HTTP_HOST'])
138
- @env['SERVER_NAME'] = remove_lang(@env['SERVER_NAME'])
123
+ @env['HTTP_HOST'] = @url_lang_switcher.remove_lang_from_uri_component(@env['HTTP_HOST'], lang_code)
124
+ @env['SERVER_NAME'] = @url_lang_switcher.remove_lang_from_uri_component(@env['SERVER_NAME'], lang_code)
139
125
  end
140
- @env['HTTP_REFERER'] = remove_lang(@env['HTTP_REFERER']) if @env.key?('HTTP_REFERER')
126
+ @env['HTTP_REFERER'] = @url_lang_switcher.remove_lang_from_uri_component(@env['HTTP_REFERER'], lang_code) if @env.key?('HTTP_REFERER')
141
127
  # when 'path'
142
128
  else
143
- @env['REQUEST_URI'] = remove_lang(@env['REQUEST_URI'])
144
- @env['REQUEST_PATH'] = remove_lang(@env['REQUEST_PATH']) if @env.key?('REQUEST_PATH')
145
- @env['PATH_INFO'] = remove_lang(@env['PATH_INFO'])
146
- @env['ORIGINAL_FULLPATH'] = remove_lang(@env['ORIGINAL_FULLPATH']) if @env.key?('ORIGINAL_FULLPATH')
147
- @env['HTTP_REFERER'] = remove_lang(@env['HTTP_REFERER']) if @env.key?('HTTP_REFERER')
129
+ @env['REQUEST_URI'] = @url_lang_switcher.remove_lang_from_uri_component(@env['REQUEST_URI'], lang_code)
130
+ @env['REQUEST_PATH'] = @url_lang_switcher.remove_lang_from_uri_component(@env['REQUEST_PATH'], lang_code) if @env.key?('REQUEST_PATH')
131
+ @env['PATH_INFO'] = @url_lang_switcher.remove_lang_from_uri_component(@env['PATH_INFO'], lang_code)
132
+ @env['ORIGINAL_FULLPATH'] = @url_lang_switcher.remove_lang_from_uri_component(@env['ORIGINAL_FULLPATH'], lang_code) if @env.key?('ORIGINAL_FULLPATH')
133
+ @env['HTTP_REFERER'] = @url_lang_switcher.remove_lang_from_uri_component(@env['HTTP_REFERER'], lang_code) if @env.key?('HTTP_REFERER')
148
134
  end
149
135
  @env
150
136
  end
151
137
 
152
- # TODO: this should be in Lang for reusability
153
- # Remove language code from the URI.
154
- #
155
- # @param uri [String] original URI
156
- # @param lang_code [String] language code
157
- # @return [String] removed URI
158
- def remove_lang(uri, lang = path_lang)
159
- lang_code = Store.instance.settings['custom_lang_aliases'][lang] || lang
160
-
161
- # Do nothing if lang is empty.
162
- return uri if lang_code.nil? || lang_code.empty?
163
-
164
- case @settings['url_pattern']
165
- when 'query'
166
- lang_param_name = @settings['lang_param_name']
167
- uri.sub(/(^|\?|&)#{lang_param_name}=#{lang_code}(&|$)/, '\1').gsub(/(\?|&)$/, '')
168
- when 'subdomain'
169
- rp = Regexp.new("(^|(//))#{lang_code}\\.", 'i')
170
- uri.sub(rp, '\1')
171
- # when 'path'
172
- else
173
- uri.sub(/\/#{lang_code}(\/|$)/, '/')
174
- end
175
- end
176
-
177
138
  def out(headers)
178
139
  r = Regexp.new("//#{@host}")
179
140
  lang_code = Store.instance.settings['custom_lang_aliases'][self.lang_code] || self.lang_code
@@ -197,11 +158,24 @@ module Wovnrb
197
158
  end
198
159
 
199
160
  def dirname
200
- if pathname.include?('/')
201
- pathname.end_with?('/') ? pathname : pathname[0, pathname.rindex('/') + 1]
161
+ if pathname_with_trailing_slash_if_present.include?('/')
162
+ pathname_with_trailing_slash_if_present.end_with?('/') ? pathname_with_trailing_slash_if_present : pathname_with_trailing_slash_if_present[0, pathname_with_trailing_slash_if_present.rindex('/') + 1]
202
163
  else
203
164
  '/'
204
165
  end
205
166
  end
167
+
168
+ def search_engine_bot?
169
+ return false unless @env.key?('HTTP_USER_AGENT')
170
+
171
+ bots = %w[Googlebot/ bingbot/ YandexBot/ YandexWebmaster/ DuckDuckBot-Https/ Baiduspider/ Slurp Yahoo]
172
+ bots.any? { |bot| @env['HTTP_USER_AGENT'].include?(bot) }
173
+ end
174
+
175
+ def to_absolute_path(path)
176
+ absolute_path = path.blank? ? '/' : path
177
+ absolute_path = absolute_path.starts_with?('/') ? absolute_path : URL.join_paths(dirname, absolute_path)
178
+ URL.normalize_path_slash(path, absolute_path)
179
+ end
206
180
  end
207
181
  end
@@ -1,9 +1,10 @@
1
1
  module Wovnrb
2
2
  class HtmlConverter
3
- def initialize(dom, store, headers)
3
+ def initialize(dom, store, headers, url_lang_switcher)
4
4
  @dom = dom
5
5
  @headers = headers
6
6
  @store = store
7
+ @url_lang_switcher = url_lang_switcher
7
8
  end
8
9
 
9
10
  def build
@@ -32,6 +33,7 @@ module Wovnrb
32
33
  replace_snippet
33
34
  replace_hreflangs
34
35
  inject_lang_html_tag
36
+ translate_canonical_tag if @store.settings['translate_canonical_tag']
35
37
  end
36
38
 
37
39
  def replace_snippet
@@ -48,6 +50,7 @@ module Wovnrb
48
50
  insert_snippet(adds_backend_error_mark: true)
49
51
  insert_hreflang_tags
50
52
  inject_lang_html_tag
53
+ translate_canonical_tag if @store.settings['translate_canonical_tag']
51
54
 
52
55
  html
53
56
  end
@@ -143,6 +146,19 @@ module Wovnrb
143
146
  end
144
147
  end
145
148
 
149
+ def translate_canonical_tag
150
+ canonical_node = @dom.at_css('link[rel="canonical"]')
151
+ return unless canonical_node
152
+
153
+ lang_code = @headers.lang_code
154
+ return if lang_code == @store.settings['default_lang'] && @store.settings['custom_lang_aliases'][lang_code].nil?
155
+
156
+ canonical_url = canonical_node['href']
157
+
158
+ translated_canonical_url = @url_lang_switcher.add_lang_code(canonical_url, lang_code, @headers)
159
+ canonical_node['href'] = translated_canonical_url
160
+ end
161
+
146
162
  # Remove wovn snippet code from dom
147
163
  def strip_snippet
148
164
  @dom.xpath('//script').each do |script_node|
@@ -0,0 +1,136 @@
1
+ module Wovnrb
2
+ # URL utility ported from html-swapper
3
+ class URL
4
+ module FileExtension
5
+ IMG_FILES = 'jpe|jpe?g|bmp|gif|png|btif|tiff?|psd|djvu?|xif|wbmp|webp|p(n|b|g|p)m|rgb|tga|x(b|p)m|xwd|pic|ico|fh(c|4|5|7)?|xif|f(bs|px|st)'.freeze
6
+ AUDIO_FILES = 'mp(3|2)|m(p?2|3|p?4|pg)a|midi?|kar|rmi|web(m|a)|aif(f?|c)|w(ma|av|ax)|m(ka|3u)|sil|s3m|og(a|g)|uvv?a'.freeze
7
+ VIDEO_FILES = 'm(x|4)u|fl(i|v)|3g(p|2)|jp(gv|g?m)|mp(4v?|g4|(?!$)e?g?)|m(1|2)v|ogv|m(ov|ng)|qt|uvv?(h|m|p|s|v)|dvb|mk(v|3d|s)|f4v|as(x|f)|w(m(v|x)|vx)|xvid'.freeze
8
+ DOC_FILES = '(7|g)?zip|tar|rar|7z|gz|ez|aw|atom(cat|svc)?|(cc)?xa?ml|cdmi(a|c|d|o|q)?|epub|g(ml|px|xf)|jar|js|ser|class|json(ml)?|do(c|t)(m|x)?|xls(m|x)?|xps|pp(a|tx?|s)m?|potm?|sldm|mp(p|t)|bin|dms|lrf|mar|so|dist|distz|m?pkg|bpk|dump|rtf|tfi|pdf|pgp|apk|o(t|d)(b|c|ft?|g|h|i|p|s|t)'.freeze
9
+ end
10
+
11
+ # TODO: Maybe this should be applied to all get_attribute calls rather than just href
12
+ def self.normalize_url(href)
13
+ return nil unless href
14
+
15
+ href.delete("\u200b").strip
16
+ end
17
+
18
+ def self.absolute_url?(href)
19
+ href =~ %r{^(https?:)?//}i
20
+ end
21
+
22
+ def self.absolute_path?(href)
23
+ href.match?(%r{^/})
24
+ end
25
+
26
+ def self.relative_path?(href)
27
+ !absolute_url?(href) && !absolute_path?(href)
28
+ end
29
+
30
+ # @param parsed_uri [Addressable::URI]
31
+ def self.path_and_query(parsed_uri)
32
+ parsed_uri.path + (parsed_uri.query ? "?#{parsed_uri.query}" : '')
33
+ end
34
+
35
+ def self.path_and_query_and_hash(parsed_uri)
36
+ uri = parsed_uri.path
37
+ uri += "?#{parsed_uri.query}" if parsed_uri.query
38
+ uri += "##{parsed_uri.fragment}" if parsed_uri.fragment
39
+ uri
40
+ end
41
+
42
+ def self.host_with_port(parsed_uri)
43
+ if parsed_uri.port
44
+ "#{parsed_uri.host}:#{parsed_uri.port}"
45
+ else
46
+ parsed_uri.host.to_s
47
+ end
48
+ end
49
+
50
+ def self.resolve_absolute_uri(base_url, href)
51
+ # This resolves ./../ and also handles href already being absolute
52
+ Addressable::URI.join(base_url, href)
53
+ rescue Addressable::URI::InvalidURIError, ArgumentError => e
54
+ Rollbar.warning('Failed to resolve absolute URI', original_error: e, base_url: base_url, href: href)
55
+ raise
56
+ end
57
+
58
+ def self.resolve_absolute_path(base_url, href)
59
+ normalized_uri = resolve_absolute_uri(base_url, href)
60
+ path = normalized_uri.path
61
+ query = normalized_uri.query ? "?#{normalized_uri.query}" : ''
62
+ fragment = normalized_uri.fragment ? "##{normalized_uri.fragment}" : ''
63
+
64
+ path + query + fragment
65
+ end
66
+
67
+ # Set the path lang to
68
+ def self.prepend_path(url, dir)
69
+ url.sub(%r{(.+\.[^/]+)(/|$)}, "\\1/#{dir}\\2")
70
+ end
71
+
72
+ def self.trim_slashes(path)
73
+ path.gsub(%r{^/|/$}, '')
74
+ end
75
+
76
+ def self.prepend_path_slash(path)
77
+ path ||= ''
78
+ return path if path.starts_with?('/')
79
+
80
+ "/#{path}"
81
+ end
82
+
83
+ def self.join_paths(*paths)
84
+ paths.inject('') do |left, right|
85
+ case [left.end_with?('/'), right.start_with?('/')]
86
+ when [true, true]
87
+ left + right[1..-1]
88
+ when [false, false]
89
+ left + (right.blank? ? right : "/#{right}")
90
+ else
91
+ left + right
92
+ end
93
+ end
94
+ end
95
+
96
+ # @param uri [Addressable::URI]
97
+ # @param new_protocol [String | nil]
98
+ # @return copy of uri [Addressable::URI]
99
+ def self.change_protocol(uri, new_protocol)
100
+ result = uri.dup
101
+ result.scheme = new_protocol
102
+ result
103
+ end
104
+
105
+ def self.valid_protocol?(href)
106
+ scheme_matches = /^\s*(?<scheme>[a-zA-Z]+):/.match(href)
107
+ scheme = scheme_matches ? scheme_matches[:scheme] : nil
108
+
109
+ scheme.nil? || %w[http https].include?(scheme)
110
+ end
111
+
112
+ def self.file?(href_with_query_and_hash)
113
+ href = remove_query_and_hash(href_with_query_and_hash)
114
+ img_files = %r{^(https?://)?.*(\.(#{FileExtension::IMG_FILES}))((\?|#).*)?$}io
115
+ audio_files = %r{^(https?://)?.*(\.(#{FileExtension::AUDIO_FILES}))((\?|#).*)?$}io
116
+ video_files = %r{^(https?://)?.*(\.(#{FileExtension::VIDEO_FILES}))((\?|#).*)?$}io
117
+ doc_files = %r{^(https?://)?.*(\.(#{FileExtension::DOC_FILES}))((\?|#).*)?$}io
118
+ href.match?(img_files) || href.match?(audio_files) || href.match?(video_files) || href.match?(doc_files)
119
+ end
120
+
121
+ def self.remove_query_and_hash(href)
122
+ href.gsub(/[#?].*/, '')
123
+ end
124
+
125
+ # if original path does not end in slash, remove it from new path
126
+ # if original path ends in slash, add it to new path
127
+ def self.normalize_path_slash(original_path, new_path)
128
+ if !original_path.end_with?('/') && new_path.end_with?('/')
129
+ new_path = new_path.chomp('/')
130
+ elsif original_path.end_with?('/') && !new_path.end_with?('/')
131
+ new_path += '/'
132
+ end
133
+ new_path
134
+ end
135
+ end
136
+ end