siat 3.7.7__py3-none-any.whl → 3.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
siat/allin.py CHANGED
@@ -41,6 +41,9 @@ from siat.financials import *
41
41
  # 财务分析:雅虎源
42
42
  from siat.financials2 import *
43
43
 
44
+ # 财务报表:雅虎源
45
+ from siat.fin_stmt2_yahoo import *
46
+
44
47
  # 财务分析:中国
45
48
  from siat.financials_china import *
46
49
 
siat/common.py CHANGED
@@ -4171,6 +4171,77 @@ def firstLetterUpper(text):
4171
4171
 
4172
4172
  return utext
4173
4173
 
4174
+
4175
+ #==============================================================================
4176
+ if __name__ == '__main__':
4177
+ long_text = "Hello, this is a test string."
4178
+ short_text = "test strng"
4179
+
4180
+ similar_substring, similarity = find_similar_substring(long_text, short_text)
4181
+ if similarity:
4182
+ print(f"Similar substring found: {similar_substring}, Similarity: {similarity}")
4183
+ else:
4184
+ print("No similar substring found.")
4185
+
4186
+
4187
+ def find_similar_substring(long_string, short_string, threshold=0.7):
4188
+ """
4189
+
4190
+ 功能:判断一个字符串中是否存在与另一个字符串相似度较高的子串
4191
+ 注意:尚未测试
4192
+ """
4193
+
4194
+ import difflib
4195
+
4196
+ # 使用SequenceMatcher比较字符串
4197
+ matcher = difflib.SequenceMatcher(None, long_string, short_string)
4198
+
4199
+ # 遍历所有可能的子串长度
4200
+ for size in range(len(short_string), len(long_string) + 1):
4201
+ for start in range(0, len(long_string) - size + 1):
4202
+ # 获取子串并计算相似度
4203
+ substring = long_string[start:start + size]
4204
+ similarity = matcher.ratio()
4205
+
4206
+ # 如果相似度超过阈值,返回子串
4207
+ if similarity > threshold:
4208
+ return substring, similarity
4209
+
4210
+ # 如果没有找到相似度较高的子串,返回None
4211
+ return None, None
4212
+
4213
+
4214
+ #==============================================================================
4215
+ if __name__ == '__main__':
4216
+ str1 = "kitten"
4217
+ str2 = "sitting"
4218
+
4219
+ string_similarity(str1,str2)
4220
+
4221
+
4222
+ def string_similarity(str1,str2,ignore_cases=True):
4223
+ """
4224
+
4225
+ 功能:计算两个字符串的文本相似度
4226
+ """
4227
+ import difflib
4228
+
4229
+ if ignore_cases:
4230
+ string1=str1.lower()
4231
+ string2=str2.lower()
4232
+ else:
4233
+ string1=str1
4234
+ string2=str2
4235
+
4236
+ # 创建SequenceMatcher对象
4237
+ matcher = difflib.SequenceMatcher(None, string1, string2)
4238
+
4239
+ # 计算相似度
4240
+ similarity = matcher.ratio()
4241
+ #print(f"SequenceMatcher Similarity: {similarity:.2f}")
4242
+
4243
+ return similarity
4244
+
4174
4245
  #==============================================================================
4175
4246
  if __name__ == '__main__':
4176
4247
  string = "HeLLo, Welcome to this New WorLd!"
@@ -4181,7 +4252,7 @@ if __name__ == '__main__':
4181
4252
  def contains_any(string, words):
4182
4253
  """
4183
4254
 
4184
- 功能:测试字符串string中是否含有字符串列表words中的任意一个元素
4255
+ 功能:测试字符串string中是否含有字符串列表words中的任意一个元素,忽略字母大小写
4185
4256
  参数:
4186
4257
  string:字符串,大小写不限
4187
4258
  words:字符串列表,大小写不限
@@ -4208,6 +4279,183 @@ def contains_any(string, words):
4208
4279
  #检查字符串new_string是否包含列表new_words_list中的任何元素
4209
4280
  return any((word in new_string) for word in new_words_list)
4210
4281
 
4282
+ #==============================================================================
4283
+ if __name__ == '__main__':
4284
+ string = "HeLLo, Welcome to this New WorLd!"
4285
+ words = ["Hello", "World"]
4286
+ words = ["Hello", "World","the"]
4287
+
4288
+ contains_all(string, words)
4289
+
4290
+ def contains_all(string, words):
4291
+ """
4292
+
4293
+ 功能:测试字符串string中是否含有字符串列表words中的全部元素,忽略字母大小写
4294
+ 参数:
4295
+ string:字符串,大小写不限
4296
+ words:字符串列表,大小写不限
4297
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4298
+ """
4299
+
4300
+ result=True
4301
+ for w in words:
4302
+ if not contains_any(string,w):
4303
+ result=False
4304
+ break
4305
+
4306
+ return result
4307
+
4308
+
4309
+ #==============================================================================
4310
+ if __name__ == '__main__':
4311
+ alist = ["CurrentDebt",
4312
+ "CurrentDebtAndCapitalLeaseObligation",
4313
+ "CurrentDeferredLiabilities",
4314
+ "CurrentLiabilities",
4315
+ "OtherCurrentBorrowings",
4316
+ "OtherCurrentLiabilities",
4317
+ "OtherNonCurrentLiabilities",
4318
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4319
+
4320
+ alist = [
4321
+ "CurrentDebtAndCapitalLeaseObligation",
4322
+ "CurrentDeferredLiabilities",
4323
+ "CurrentLiabilities",
4324
+ "OtherCurrentBorrowings",
4325
+ "OtherCurrentLiabilities",
4326
+ "OtherNonCurrentLiabilities",
4327
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4328
+
4329
+ item_words = ["Current", "Debt"]
4330
+ item_words = ["Current", "Liabilities"]
4331
+
4332
+ perfect_match=True
4333
+
4334
+ list_contains_all(alist, item_words)
4335
+
4336
+ def list_contains_all(alist, item_words,perfect_match=True):
4337
+ """
4338
+
4339
+ 功能:测试列表alist中是否有元素含有字符串列表item_words中的全部元素,忽略字母大小写
4340
+ 参数:
4341
+ alist:字符串列表,大小写不限
4342
+ item_words:字符串列表,大小写不限
4343
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4344
+ 返回值:
4345
+ 若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
4346
+ 若无则返回False
4347
+ """
4348
+ DEBUG=False
4349
+
4350
+ #将item_words合成为一个字符串,以便比较相似度
4351
+ words=''
4352
+ for w in item_words:
4353
+ words=words+w
4354
+ if DEBUG:
4355
+ print(f" DEBUG: item_words={item_words}, words={words}")
4356
+
4357
+ result=False
4358
+ best_similarity=0
4359
+ for e in alist:
4360
+ similarity=0
4361
+
4362
+ if DEBUG:
4363
+ print(f" DEBUG: e={e}")
4364
+
4365
+ if perfect_match: #要求e精确含有item_words中的每个元素
4366
+ if contains_all(e,item_words):
4367
+ similarity=string_similarity(e,words)
4368
+ else:
4369
+ similarity=string_similarity(e,words)
4370
+
4371
+ if DEBUG:
4372
+ print(f" DEBUG: item_words={item_words}, e={e}, similarity={similarity}")
4373
+
4374
+ if similarity > best_similarity:
4375
+ best_similarity=similarity
4376
+ result=e
4377
+
4378
+
4379
+ return result,best_similarity
4380
+
4381
+ if __name__ == '__main__':
4382
+ alist = ["CurrentDebt",
4383
+ "CurrentDebtAndCapitalLeaseObligation",
4384
+ "CurrentDeferredLiabilities",
4385
+ "CurrentLiabilities",
4386
+ "OtherCurrentBorrowings",
4387
+ "OtherCurrentLiabilities",
4388
+ "OtherNonCurrentLiabilities",
4389
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4390
+
4391
+ alist = [
4392
+ "CurrentDebtAndCapitalLeaseObligation",
4393
+ "CurrentDeferredLiabilities",
4394
+ "CurrentLiabilities",
4395
+ "OtherCurrentBorrowings",
4396
+ "OtherCurrentLiabilities",
4397
+ "OtherNonCurrentLiabilities",
4398
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4399
+
4400
+ item_words_list=[["Current","Debt"],["Current","Liabilities"]]
4401
+ item_words_list=[["Current","Liabilibities"],["Current","Debt"]]
4402
+
4403
+ list_contains_all_list(alist, item_words_list)
4404
+
4405
+ def list_contains_all_list(alist, item_words_list):
4406
+ """
4407
+
4408
+ 功能:测试列表alist中是否有元素含有字符串列表组中item_words_list各个item_words中的全部元素,忽略字母大小写
4409
+ 参数:
4410
+ alist:字符串列表,大小写不限
4411
+ item_words_list:字符串列表组,大小写不限。第1个为最佳字符串列表,后面可跟多个替代最佳字符串列表
4412
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4413
+ 返回值:
4414
+ 若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
4415
+ 若出现多个最高相似度相同的,则返回第一个
4416
+ 若无则返回False
4417
+ """
4418
+ DEBUG=False
4419
+
4420
+ best_result=False
4421
+ best_similarity=0
4422
+
4423
+ for iwords in item_words_list:
4424
+ result,similarity=list_contains_all(alist, iwords,perfect_match=False)
4425
+ if DEBUG:
4426
+ print(" DEBUG: iwords={0}, alist={1}".format(iwords,alist))
4427
+ #print(" DEBUG: result={0}, similarity={1}".format(result,similarity))
4428
+ print('')
4429
+ print(f" DEBUG: result={result}, similarity={similarity:.2f}")
4430
+
4431
+ if similarity > best_similarity:
4432
+ best_similarity=similarity
4433
+ best_result=result
4434
+
4435
+ return best_result
4436
+
4437
+
4438
+ #==============================================================================
4439
+ if __name__ == '__main__':
4440
+ max_sleep=30
4441
+
4442
+ sleep_random(max_sleep)
4443
+
4444
+ def sleep_random(max_sleep=30):
4445
+ """
4446
+
4447
+ 功能:随机挂起秒数,以防被数据源封堵IP地址,适用于连续抓取同种信息时。
4448
+ 参数:
4449
+ max_sleep:最大挂起秒数,默认30秒。随机挂起1-30秒。
4450
+ """
4451
+
4452
+ import time; import random
4453
+
4454
+ random_int=random.randint(1,max_sleep)
4455
+ time.sleep(random_int)
4456
+
4457
+ return
4458
+
4211
4459
  #==============================================================================
4212
4460
  if __name__ == '__main__':
4213
4461
  s = "Hello, 世界! This is a test string with symbols #$%^&*()."