tx_asr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 86d453ba155a1fc1936f8899d3f29f9a7e7fc42b5bfcaf52069274d0e17313c5
4
+ data.tar.gz: 2dbbba1c8682554f4657767fb754c2e3f5f05c71f97c61458f0c5f1c57dfc504
5
+ SHA512:
6
+ metadata.gz: 965433e364a45ed2124f39d6213223fefb56b68e3c5113b17cf594a880b2d6a9f5eeac944e94ba69c43c3e60a6475b23d71e135e97d2bdbdb622504905ee9839
7
+ data.tar.gz: abbde1b3bdc48c18e1b047370d83ba2fcd1d6c5fdd17bf32e75e4fb4589dc65c34d9ffdadae876c1a6fcc3f17dc3baaf5f31233b920fec9f07bc37df1314b187
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2021 ian
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # TxAsr
2
+ Short description and motivation.
3
+
4
+ ## Usage
5
+ ```ruby
6
+
7
+ # sentence recognition
8
+ result = TxAsr::SentenceRecognition.from(audio_url)
9
+
10
+ if result.success?
11
+ p result.data
12
+ end
13
+ ```
14
+
15
+ ## Installation
16
+ 1. 在 Gemfile 里边添加如下代码
17
+
18
+ ```ruby
19
+ gem 'tx_asr'
20
+ ```
21
+
22
+ 2. 执行 bundle
23
+
24
+ ```bash
25
+ $ bundle
26
+ ```
27
+
28
+ 3. 执行安装命令
29
+ ```bash
30
+ $ rails g tx_asr:install
31
+ ```
32
+
33
+ 4. 修改 `config/initializers/tx_asr.rb` 中 `secret_id` 和 `secret_key` 为实际的值
34
+
35
+
36
+ ## Contributing
37
+ Contribution directions go here.
38
+
39
+ ## License
40
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "bundler/setup"
2
+
3
+ require "bundler/gem_tasks"
4
+
5
+ require "rake/testtask"
6
+
7
+ Rake::TestTask.new(:test) do |t|
8
+ t.libs << 'test'
9
+ t.pattern = 'test/**/*_test.rb'
10
+ t.verbose = false
11
+ end
12
+
13
+ task default: :test
@@ -0,0 +1,8 @@
1
+ Description:
2
+ Explain the generator
3
+
4
+ Example:
5
+ bin/rails generate install Thing
6
+
7
+ This will create:
8
+ what/will/it/create
@@ -0,0 +1,7 @@
1
+ class TxAsr::InstallGenerator < Rails::Generators::Base
2
+ source_root File.expand_path('templates', __dir__)
3
+
4
+ def install
5
+ copy_file "initializer.rb", "config/initializers/tx_asr.rb"
6
+ end
7
+ end
@@ -0,0 +1,28 @@
1
+
2
+ TxAsr.setup do |config|
3
+
4
+ # TODO 需要将 secret_key 和 secret_id 改为你实际项目中的配置
5
+ config.secret_key = "your_secret_key"
6
+ config.secret_id = "your_secret_id"
7
+ config.region = "ap-guangzhou"
8
+
9
+
10
+ # 声音格式: 支持 mp3 和 wav
11
+ config.voice_format = "mp3"
12
+
13
+ # 可在腾讯云后台查看。如果没有新建项目,那就是使用的默认项目,默认项目 id 为 0
14
+ config.project_id = 0
15
+
16
+ # API 版本号,截止 gem 发布前,下面的值为文档给出的值
17
+ config.version = "2019-06-14"
18
+
19
+ # source type —— 语音来源: 可选 0: url, 1: post body
20
+ config.source_type = 0
21
+
22
+ # 过滤语气词: 0 不过滤,1 部分过滤,2 严格过滤
23
+ config.filter_model = 1
24
+
25
+ # 默认声道数: 1 为单声道,2 为双声道,默认为单声道
26
+ # config.channel_num = 1
27
+
28
+ end
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :tx_asr do
3
+ # # Task goes here
4
+ # end
@@ -0,0 +1,4 @@
1
+ module TxAsr
2
+ class Railtie < ::Rails::Railtie
3
+ end
4
+ end
@@ -0,0 +1,83 @@
1
+
2
+ module TxAsr
3
+ class Request
4
+ def initialize(payload)
5
+ @payload = default_payload.merge(payload)
6
+ end
7
+
8
+ def call(action)
9
+ result = request(action)
10
+
11
+ if result["Response"]["Error"].present?
12
+ # {
13
+ # "Response":
14
+ # {
15
+ # "Error": {
16
+ # "Code": "AuthFailure.SignatureFailure",
17
+ # "Message": "The provided credentials could not be validated. Please check your signature is correct."
18
+ # },
19
+ # "RequestId":"ff48f384-6da7-4797-874a-b88e5e186136"
20
+ # }
21
+ # }
22
+ ServiceResult.new(errors: result["Response"]["Error"], message: result["Response"]["Error"].fetch("Message"))
23
+ else
24
+ # {
25
+ # "Response":
26
+ # {
27
+ # "RequestId": "6f24aeab-9929-4aec-81de-e3eff87639f6",
28
+ # "Data": {
29
+ # "TaskId":
30
+ # 1357048750
31
+ # }
32
+ # }
33
+ # }
34
+ ServiceResult.new(success: true, data: result["Response"])
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+
41
+ def default_payload
42
+ {
43
+ SourceType: TxAsr.source_type || TxAsr::SOURCE_TYPE_URL,
44
+ }
45
+ end
46
+
47
+ def request(action)
48
+ authorization = Sign.new(@payload).authorization
49
+ uri = URI(TxAsr::ENDPOINT)
50
+ req = Net::HTTP::Post.new(uri)
51
+ req["Authorization"] = authorization
52
+ req["Content-Type"] = "application/json; charset=utf-8"
53
+ req["Host"] = TxAsr::API_HOST
54
+ req["X-TC-Action"] = action
55
+ req["X-TC-Timestamp"] = timestamp
56
+ req["X-TC-Version"] = TxAsr.version
57
+ req["X-TC-Region"] = TxAsr.region
58
+
59
+ resp = Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
60
+ http.request(req, @payload.to_json)
61
+ end
62
+
63
+ if resp.is_a? Net::HTTPSuccess
64
+ JSON.parse(resp.body).with_indifferent_access
65
+ else
66
+ logger.error <<-ERROR
67
+ #{resp.message}
68
+ uri: #{resp.uri}
69
+ code: #{resp.code}
70
+ #{req.each_header.inject(""){|headers, header| headers + header.join(": ") + "\n"}.rstrip}
71
+ body:
72
+ #{resp.body}
73
+ ERROR
74
+
75
+ raise Errors::RequestError.new(req, resp)
76
+ end
77
+ end
78
+
79
+ def timestamp
80
+ @timestamp ||= Time.current.to_i
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,31 @@
1
+ module TxAsr
2
+ class SentenceRecognition
3
+ # 2 为 一句话识别
4
+ SUB_SERVICE_TYPE = 2
5
+
6
+ ACTION = "SentenceRecognition"
7
+
8
+ ENG_SERVICE_TYPE_8K_ZH = "8k_zh"
9
+ ENG_SERVICE_TYPE_16K_ZH = "16k_zh"
10
+
11
+ def self.from(url)
12
+ payload = common_payload.merge({ Url: url })
13
+
14
+ Request.new(payload).call(ACTION)
15
+ end
16
+
17
+ def self.common_payload
18
+ {
19
+ ProjectId: TxAsr.project_id || TxAsr::PROJECT_ID_DEFAULT,
20
+ SubServiceType: SUB_SERVICE_TYPE,
21
+ EngSerViceType: ENG_SERVICE_TYPE_8K_ZH, # 这里的键名是照着文档的,是正确的
22
+ VoiceFormat: TxAsr.voice_format,
23
+ UsrAudioKey: voice_id
24
+ }
25
+ end
26
+
27
+ def self.voice_id
28
+ SecureRandom.uuid
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,46 @@
1
+ module TxAsr
2
+ class ServiceResult
3
+ attr_accessor :success,
4
+ :errors,
5
+ :data,
6
+ :message,
7
+ :message_type
8
+
9
+ def initialize(success: false,
10
+ errors: nil,
11
+ message: nil,
12
+ message_type: nil,
13
+ data: nil)
14
+ self.success = success
15
+ self.data = data
16
+ self.errors = errors.is_a?(Array) ? errors : [errors]
17
+ self.message = message
18
+ self.message_type = message_type
19
+ end
20
+
21
+ alias success? :success
22
+
23
+ def failure?
24
+ !success?
25
+ end
26
+
27
+ def on_success
28
+ yield(self) if success?
29
+ end
30
+
31
+ def on_failure
32
+ yield(self) if failure?
33
+ end
34
+
35
+ def get_message_type
36
+ if message_type.present?
37
+ message_type.to_sym
38
+ elsif success?
39
+ :notice
40
+ else
41
+ :error
42
+ end
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,119 @@
1
+
2
+ require 'digest'
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'time'
6
+ require 'openssl'
7
+
8
+ module TxAsr
9
+ class Sign
10
+ attr_accessor :file_url
11
+
12
+ SERVICE = 'asr'
13
+
14
+ HOST = "asr.tencentcloudapi.com"
15
+
16
+ # 加密算法
17
+ ALGORITHM = 'TC3-HMAC-SHA256'
18
+
19
+ # 场景模型
20
+ ENGINE_MODEL = "16k_zh_video"
21
+
22
+ # 声道数
23
+ CHANNEL_NUM = 1
24
+
25
+ # 结果回调地址
26
+ CALLBACK_URL = "https://demo.mini-geek.com/speech_to_text/callback"
27
+
28
+ # 声音文件来源: url
29
+ AUDIO_SOURCE_FROM_URL = 0
30
+
31
+ # 声音文件来源: 请求体
32
+ AUDIO_SOURCE_FROM_BODY = 1
33
+
34
+ # 过滤语气词: 部分
35
+ FILTER_INTERJECTION_PART = 1
36
+
37
+ # 过滤语气词: 所有
38
+ FILTER_INTERJECTION_ALL = 2
39
+
40
+ # 翻译的结果具体到单词时间,并返回语速
41
+ RESULT_FORMAT_WORDS_WITH_PUNC = 2
42
+
43
+ # def initialize(file_url)
44
+ # @file_url = file_url
45
+ # end
46
+
47
+ def initialize(payload)
48
+ @payload = payload
49
+ end
50
+
51
+ def authorization
52
+ signature = sign(@payload)
53
+
54
+ date = Time.at(timestamp).utc.strftime('%Y-%m-%d')
55
+
56
+ credential_scope = date + '/' + SERVICE + "/tc3_request"
57
+ signed_headers = 'content-type;host'
58
+ "#{ALGORITHM} Credential=#{TxAsr.secret_id}/#{credential_scope}, SignedHeaders=#{signed_headers}, Signature=#{signature}"
59
+ end
60
+
61
+ private
62
+
63
+ def logger
64
+ @logger ||= Logger.new Rails.root.join("log/speech_to_text.log")
65
+ end
66
+
67
+ def payload
68
+ {
69
+ EngineModelType: ENGINE_MODEL,
70
+ ChannelNum: CHANNEL_NUM,
71
+ ResTextFormat: RESULT_FORMAT_WORDS_WITH_PUNC,
72
+ SourceType: AUDIO_SOURCE_FROM_URL,
73
+ CallbackUrl: CALLBACK_URL,
74
+ Url: file_url
75
+ }
76
+ end
77
+
78
+ def sign(payload)
79
+ http_request_method = 'POST'
80
+ canonical_uri = '/'
81
+ canonical_querystring = ''
82
+ canonical_headers = "content-type:application/json; charset=utf-8\nhost:#{HOST}\n"
83
+ signed_headers = 'content-type;host'
84
+
85
+ hashed_request_payload = Digest::SHA256.hexdigest(payload.to_json)
86
+ canonical_request = [
87
+ http_request_method,
88
+ canonical_uri,
89
+ canonical_querystring,
90
+ canonical_headers,
91
+ signed_headers,
92
+ hashed_request_payload,
93
+ ].join("\n")
94
+
95
+ date = Time.at(timestamp).utc.strftime('%Y-%m-%d')
96
+
97
+ credential_scope = date + '/' + SERVICE + '/tc3_request'
98
+ hashed_request_payload = Digest::SHA256.hexdigest(canonical_request)
99
+ string_to_sign = [
100
+ ALGORITHM,
101
+ timestamp.to_s,
102
+ credential_scope,
103
+ hashed_request_payload,
104
+ ].join("\n")
105
+
106
+ digest = OpenSSL::Digest.new('sha256')
107
+ secret_date = OpenSSL::HMAC.digest(digest, 'TC3' + TxAsr.secret_key, date)
108
+ secret_service = OpenSSL::HMAC.digest(digest, secret_date, SERVICE)
109
+ secret_signing = OpenSSL::HMAC.digest(digest, secret_service, 'tc3_request')
110
+ OpenSSL::HMAC.hexdigest(digest, secret_signing, string_to_sign)
111
+ end
112
+
113
+ private
114
+
115
+ def timestamp
116
+ @timestamp ||= Time.current.to_i
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,3 @@
1
+ module TxAsr
2
+ VERSION = '0.1.0'
3
+ end
data/lib/tx_asr.rb ADDED
@@ -0,0 +1,45 @@
1
+ require "tx_asr/version"
2
+ require "tx_asr/railtie"
3
+ require "tx_asr/service_result"
4
+ require "tx_asr/request"
5
+ require "tx_asr/sign"
6
+ require "tx_asr/sentence_recognition"
7
+
8
+ module TxAsr
9
+ # 声道
10
+ SINGLE_CHANNEL = 1
11
+ DOUBLE_CHANNEL = 2
12
+
13
+ # 语音来源
14
+ SOURCE_TYPE_URL = 0
15
+ SOURCE_TYPE_BODY = 1
16
+
17
+ ENDPOINT = "https://asr.tencentcloudapi.com"
18
+ API_HOST = "asr.tencentcloudapi.com"
19
+
20
+ # api 版本
21
+ API_VERSION = "2019-06-14"
22
+
23
+ # 音频格式
24
+ VOICE_FORMAT_MP3 = "mp3"
25
+ VOICE_FORMAT_WAV = "wav"
26
+
27
+ PROJECT_ID_DEFAULT = 0
28
+
29
+
30
+ # Your code goes here...
31
+ mattr_accessor :secret_id,
32
+ :secret_key,
33
+ :voice_format,
34
+ :project_id,
35
+ :version,
36
+ :source_type,
37
+ :api_host,
38
+ :channel_num,
39
+ :region,
40
+ :filter_model
41
+
42
+ def self.setup
43
+ yield self if block_given?
44
+ end
45
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tx_asr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - ian
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-10-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 6.1.4
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 6.1.4.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 6.1.4
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 6.1.4.1
33
+ description: for development app with tencent cloud ASR(Automatic Speech Recognition)
34
+ email:
35
+ - ianlynxk@gmail.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - MIT-LICENSE
41
+ - README.md
42
+ - Rakefile
43
+ - lib/generators/tx_asr/install/USAGE
44
+ - lib/generators/tx_asr/install/install_generator.rb
45
+ - lib/generators/tx_asr/install/templates/initializer.rb
46
+ - lib/tasks/tx_asr_tasks.rake
47
+ - lib/tx_asr.rb
48
+ - lib/tx_asr/railtie.rb
49
+ - lib/tx_asr/request.rb
50
+ - lib/tx_asr/sentence_recognition.rb
51
+ - lib/tx_asr/service_result.rb
52
+ - lib/tx_asr/sign.rb
53
+ - lib/tx_asr/version.rb
54
+ homepage: https://github.com/bkyz/tx_asr
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/bkyz/tx_asr
59
+ source_code_uri: https://github.com/bkyz/tx_asr
60
+ changelog_uri: https://github.com/bkyz/tx_asr/CHANGELOG.md
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubygems_version: 3.2.15
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: for development app with tencent cloud ASR(Automatic Speech Recognition)
80
+ test_files: []