siat 3.7.7__py3-none-any.whl → 3.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/allin.py +3 -0
- siat/common.py +249 -1
- siat/fin_stmt2_yahoo.py +982 -0
- siat/financials2.py +41 -18
- siat/grafix.py +39 -2
- siat/stock.py +5 -5
- siat/translate.py +301 -1
- siat/valuation.py +12 -8
- {siat-3.7.7.dist-info → siat-3.7.9.dist-info}/METADATA +3 -1
- {siat-3.7.7.dist-info → siat-3.7.9.dist-info}/RECORD +13 -12
- {siat-3.7.7.dist-info → siat-3.7.9.dist-info}/LICENSE +0 -0
- {siat-3.7.7.dist-info → siat-3.7.9.dist-info}/WHEEL +0 -0
- {siat-3.7.7.dist-info → siat-3.7.9.dist-info}/top_level.txt +0 -0
siat/allin.py
CHANGED
siat/common.py
CHANGED
@@ -4171,6 +4171,77 @@ def firstLetterUpper(text):
|
|
4171
4171
|
|
4172
4172
|
return utext
|
4173
4173
|
|
4174
|
+
|
4175
|
+
#==============================================================================
|
4176
|
+
if __name__ == '__main__':
|
4177
|
+
long_text = "Hello, this is a test string."
|
4178
|
+
short_text = "test strng"
|
4179
|
+
|
4180
|
+
similar_substring, similarity = find_similar_substring(long_text, short_text)
|
4181
|
+
if similarity:
|
4182
|
+
print(f"Similar substring found: {similar_substring}, Similarity: {similarity}")
|
4183
|
+
else:
|
4184
|
+
print("No similar substring found.")
|
4185
|
+
|
4186
|
+
|
4187
|
+
def find_similar_substring(long_string, short_string, threshold=0.7):
|
4188
|
+
"""
|
4189
|
+
|
4190
|
+
功能:判断一个字符串中是否存在与另一个字符串相似度较高的子串
|
4191
|
+
注意:尚未测试
|
4192
|
+
"""
|
4193
|
+
|
4194
|
+
import difflib
|
4195
|
+
|
4196
|
+
# 使用SequenceMatcher比较字符串
|
4197
|
+
matcher = difflib.SequenceMatcher(None, long_string, short_string)
|
4198
|
+
|
4199
|
+
# 遍历所有可能的子串长度
|
4200
|
+
for size in range(len(short_string), len(long_string) + 1):
|
4201
|
+
for start in range(0, len(long_string) - size + 1):
|
4202
|
+
# 获取子串并计算相似度
|
4203
|
+
substring = long_string[start:start + size]
|
4204
|
+
similarity = matcher.ratio()
|
4205
|
+
|
4206
|
+
# 如果相似度超过阈值,返回子串
|
4207
|
+
if similarity > threshold:
|
4208
|
+
return substring, similarity
|
4209
|
+
|
4210
|
+
# 如果没有找到相似度较高的子串,返回None
|
4211
|
+
return None, None
|
4212
|
+
|
4213
|
+
|
4214
|
+
#==============================================================================
|
4215
|
+
if __name__ == '__main__':
|
4216
|
+
str1 = "kitten"
|
4217
|
+
str2 = "sitting"
|
4218
|
+
|
4219
|
+
string_similarity(str1,str2)
|
4220
|
+
|
4221
|
+
|
4222
|
+
def string_similarity(str1,str2,ignore_cases=True):
|
4223
|
+
"""
|
4224
|
+
|
4225
|
+
功能:计算两个字符串的文本相似度
|
4226
|
+
"""
|
4227
|
+
import difflib
|
4228
|
+
|
4229
|
+
if ignore_cases:
|
4230
|
+
string1=str1.lower()
|
4231
|
+
string2=str2.lower()
|
4232
|
+
else:
|
4233
|
+
string1=str1
|
4234
|
+
string2=str2
|
4235
|
+
|
4236
|
+
# 创建SequenceMatcher对象
|
4237
|
+
matcher = difflib.SequenceMatcher(None, string1, string2)
|
4238
|
+
|
4239
|
+
# 计算相似度
|
4240
|
+
similarity = matcher.ratio()
|
4241
|
+
#print(f"SequenceMatcher Similarity: {similarity:.2f}")
|
4242
|
+
|
4243
|
+
return similarity
|
4244
|
+
|
4174
4245
|
#==============================================================================
|
4175
4246
|
if __name__ == '__main__':
|
4176
4247
|
string = "HeLLo, Welcome to this New WorLd!"
|
@@ -4181,7 +4252,7 @@ if __name__ == '__main__':
|
|
4181
4252
|
def contains_any(string, words):
|
4182
4253
|
"""
|
4183
4254
|
|
4184
|
-
功能:测试字符串string中是否含有字符串列表words
|
4255
|
+
功能:测试字符串string中是否含有字符串列表words中的任意一个元素,忽略字母大小写
|
4185
4256
|
参数:
|
4186
4257
|
string:字符串,大小写不限
|
4187
4258
|
words:字符串列表,大小写不限
|
@@ -4208,6 +4279,183 @@ def contains_any(string, words):
|
|
4208
4279
|
#检查字符串new_string是否包含列表new_words_list中的任何元素
|
4209
4280
|
return any((word in new_string) for word in new_words_list)
|
4210
4281
|
|
4282
|
+
#==============================================================================
|
4283
|
+
if __name__ == '__main__':
|
4284
|
+
string = "HeLLo, Welcome to this New WorLd!"
|
4285
|
+
words = ["Hello", "World"]
|
4286
|
+
words = ["Hello", "World","the"]
|
4287
|
+
|
4288
|
+
contains_all(string, words)
|
4289
|
+
|
4290
|
+
def contains_all(string, words):
|
4291
|
+
"""
|
4292
|
+
|
4293
|
+
功能:测试字符串string中是否含有字符串列表words中的全部元素,忽略字母大小写
|
4294
|
+
参数:
|
4295
|
+
string:字符串,大小写不限
|
4296
|
+
words:字符串列表,大小写不限
|
4297
|
+
注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
|
4298
|
+
"""
|
4299
|
+
|
4300
|
+
result=True
|
4301
|
+
for w in words:
|
4302
|
+
if not contains_any(string,w):
|
4303
|
+
result=False
|
4304
|
+
break
|
4305
|
+
|
4306
|
+
return result
|
4307
|
+
|
4308
|
+
|
4309
|
+
#==============================================================================
|
4310
|
+
if __name__ == '__main__':
|
4311
|
+
alist = ["CurrentDebt",
|
4312
|
+
"CurrentDebtAndCapitalLeaseObligation",
|
4313
|
+
"CurrentDeferredLiabilities",
|
4314
|
+
"CurrentLiabilities",
|
4315
|
+
"OtherCurrentBorrowings",
|
4316
|
+
"OtherCurrentLiabilities",
|
4317
|
+
"OtherNonCurrentLiabilities",
|
4318
|
+
"TotalNonCurrentLiabilitiesNetMinorityInterest"]
|
4319
|
+
|
4320
|
+
alist = [
|
4321
|
+
"CurrentDebtAndCapitalLeaseObligation",
|
4322
|
+
"CurrentDeferredLiabilities",
|
4323
|
+
"CurrentLiabilities",
|
4324
|
+
"OtherCurrentBorrowings",
|
4325
|
+
"OtherCurrentLiabilities",
|
4326
|
+
"OtherNonCurrentLiabilities",
|
4327
|
+
"TotalNonCurrentLiabilitiesNetMinorityInterest"]
|
4328
|
+
|
4329
|
+
item_words = ["Current", "Debt"]
|
4330
|
+
item_words = ["Current", "Liabilities"]
|
4331
|
+
|
4332
|
+
perfect_match=True
|
4333
|
+
|
4334
|
+
list_contains_all(alist, item_words)
|
4335
|
+
|
4336
|
+
def list_contains_all(alist, item_words,perfect_match=True):
|
4337
|
+
"""
|
4338
|
+
|
4339
|
+
功能:测试列表alist中是否有元素含有字符串列表item_words中的全部元素,忽略字母大小写
|
4340
|
+
参数:
|
4341
|
+
alist:字符串列表,大小写不限
|
4342
|
+
item_words:字符串列表,大小写不限
|
4343
|
+
注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
|
4344
|
+
返回值:
|
4345
|
+
若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
|
4346
|
+
若无则返回False
|
4347
|
+
"""
|
4348
|
+
DEBUG=False
|
4349
|
+
|
4350
|
+
#将item_words合成为一个字符串,以便比较相似度
|
4351
|
+
words=''
|
4352
|
+
for w in item_words:
|
4353
|
+
words=words+w
|
4354
|
+
if DEBUG:
|
4355
|
+
print(f" DEBUG: item_words={item_words}, words={words}")
|
4356
|
+
|
4357
|
+
result=False
|
4358
|
+
best_similarity=0
|
4359
|
+
for e in alist:
|
4360
|
+
similarity=0
|
4361
|
+
|
4362
|
+
if DEBUG:
|
4363
|
+
print(f" DEBUG: e={e}")
|
4364
|
+
|
4365
|
+
if perfect_match: #要求e精确含有item_words中的每个元素
|
4366
|
+
if contains_all(e,item_words):
|
4367
|
+
similarity=string_similarity(e,words)
|
4368
|
+
else:
|
4369
|
+
similarity=string_similarity(e,words)
|
4370
|
+
|
4371
|
+
if DEBUG:
|
4372
|
+
print(f" DEBUG: item_words={item_words}, e={e}, similarity={similarity}")
|
4373
|
+
|
4374
|
+
if similarity > best_similarity:
|
4375
|
+
best_similarity=similarity
|
4376
|
+
result=e
|
4377
|
+
|
4378
|
+
|
4379
|
+
return result,best_similarity
|
4380
|
+
|
4381
|
+
if __name__ == '__main__':
|
4382
|
+
alist = ["CurrentDebt",
|
4383
|
+
"CurrentDebtAndCapitalLeaseObligation",
|
4384
|
+
"CurrentDeferredLiabilities",
|
4385
|
+
"CurrentLiabilities",
|
4386
|
+
"OtherCurrentBorrowings",
|
4387
|
+
"OtherCurrentLiabilities",
|
4388
|
+
"OtherNonCurrentLiabilities",
|
4389
|
+
"TotalNonCurrentLiabilitiesNetMinorityInterest"]
|
4390
|
+
|
4391
|
+
alist = [
|
4392
|
+
"CurrentDebtAndCapitalLeaseObligation",
|
4393
|
+
"CurrentDeferredLiabilities",
|
4394
|
+
"CurrentLiabilities",
|
4395
|
+
"OtherCurrentBorrowings",
|
4396
|
+
"OtherCurrentLiabilities",
|
4397
|
+
"OtherNonCurrentLiabilities",
|
4398
|
+
"TotalNonCurrentLiabilitiesNetMinorityInterest"]
|
4399
|
+
|
4400
|
+
item_words_list=[["Current","Debt"],["Current","Liabilities"]]
|
4401
|
+
item_words_list=[["Current","Liabilibities"],["Current","Debt"]]
|
4402
|
+
|
4403
|
+
list_contains_all_list(alist, item_words_list)
|
4404
|
+
|
4405
|
+
def list_contains_all_list(alist, item_words_list):
|
4406
|
+
"""
|
4407
|
+
|
4408
|
+
功能:测试列表alist中是否有元素含有字符串列表组中item_words_list各个item_words中的全部元素,忽略字母大小写
|
4409
|
+
参数:
|
4410
|
+
alist:字符串列表,大小写不限
|
4411
|
+
item_words_list:字符串列表组,大小写不限。第1个为最佳字符串列表,后面可跟多个替代最佳字符串列表
|
4412
|
+
注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
|
4413
|
+
返回值:
|
4414
|
+
若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
|
4415
|
+
若出现多个最高相似度相同的,则返回第一个
|
4416
|
+
若无则返回False
|
4417
|
+
"""
|
4418
|
+
DEBUG=False
|
4419
|
+
|
4420
|
+
best_result=False
|
4421
|
+
best_similarity=0
|
4422
|
+
|
4423
|
+
for iwords in item_words_list:
|
4424
|
+
result,similarity=list_contains_all(alist, iwords,perfect_match=False)
|
4425
|
+
if DEBUG:
|
4426
|
+
print(" DEBUG: iwords={0}, alist={1}".format(iwords,alist))
|
4427
|
+
#print(" DEBUG: result={0}, similarity={1}".format(result,similarity))
|
4428
|
+
print('')
|
4429
|
+
print(f" DEBUG: result={result}, similarity={similarity:.2f}")
|
4430
|
+
|
4431
|
+
if similarity > best_similarity:
|
4432
|
+
best_similarity=similarity
|
4433
|
+
best_result=result
|
4434
|
+
|
4435
|
+
return best_result
|
4436
|
+
|
4437
|
+
|
4438
|
+
#==============================================================================
|
4439
|
+
if __name__ == '__main__':
|
4440
|
+
max_sleep=30
|
4441
|
+
|
4442
|
+
sleep_random(max_sleep)
|
4443
|
+
|
4444
|
+
def sleep_random(max_sleep=30):
|
4445
|
+
"""
|
4446
|
+
|
4447
|
+
功能:随机挂起秒数,以防被数据源封堵IP地址,适用于连续抓取同种信息时。
|
4448
|
+
参数:
|
4449
|
+
max_sleep:最大挂起秒数,默认30秒。随机挂起1-30秒。
|
4450
|
+
"""
|
4451
|
+
|
4452
|
+
import time; import random
|
4453
|
+
|
4454
|
+
random_int=random.randint(1,max_sleep)
|
4455
|
+
time.sleep(random_int)
|
4456
|
+
|
4457
|
+
return
|
4458
|
+
|
4211
4459
|
#==============================================================================
|
4212
4460
|
if __name__ == '__main__':
|
4213
4461
|
s = "Hello, 世界! This is a test string with symbols #$%^&*()."
|