PyPI - siat - Versions diffs - 3.7.7__py3-none-any.whl → 3.7.9__py3-none-any.whl - Mend

siat 3.7.7py3-none-any.whl → 3.7.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

siat/allin.py +3 -0
siat/common.py +249 -1
siat/fin_stmt2_yahoo.py +982 -0
siat/financials2.py +41 -18
siat/grafix.py +39 -2
siat/stock.py +5 -5
siat/translate.py +301 -1
siat/valuation.py +12 -8
{siat-3.7.7.dist-info → siat-3.7.9.dist-info}/METADATA +3 -1
{siat-3.7.7.dist-info → siat-3.7.9.dist-info}/RECORD +13 -12
{siat-3.7.7.dist-info → siat-3.7.9.dist-info}/LICENSE +0 -0
{siat-3.7.7.dist-info → siat-3.7.9.dist-info}/WHEEL +0 -0
{siat-3.7.7.dist-info → siat-3.7.9.dist-info}/top_level.txt +0 -0

siat/allin.py CHANGED Viewed

@@ -41,6 +41,9 @@ from siat.financials import *
 # 财务分析：雅虎源
 from siat.financials2 import *
+# 财务报表：雅虎源
+from siat.fin_stmt2_yahoo import *
 # 财务分析：中国
 from siat.financials_china import *

siat/common.py CHANGED Viewed

@@ -4171,6 +4171,77 @@ def firstLetterUpper(text):
     return utext
+#==============================================================================
+if __name__ == '__main__':
+    long_text = "Hello, this is a test string."
+    short_text = "test strng"
+    similar_substring, similarity = find_similar_substring(long_text, short_text)
+    if similarity:
+        print(f"Similar substring found: {similar_substring}, Similarity: {similarity}")
+    else:
+        print("No similar substring found.")
+def find_similar_substring(long_string, short_string, threshold=0.7):
+    """
+    功能：判断一个字符串中是否存在与另一个字符串相似度较高的子串
+    注意：尚未测试
+    """
+    import difflib
+    # 使用SequenceMatcher比较字符串
+    matcher = difflib.SequenceMatcher(None, long_string, short_string)
+    # 遍历所有可能的子串长度
+    for size in range(len(short_string), len(long_string) + 1):
+        for start in range(0, len(long_string) - size + 1):
+            # 获取子串并计算相似度
+            substring = long_string[start:start + size]
+            similarity = matcher.ratio()
+            # 如果相似度超过阈值，返回子串
+            if similarity > threshold:
+                return substring, similarity
+    # 如果没有找到相似度较高的子串，返回None
+    return None, None
+#==============================================================================
+if __name__ == '__main__':
+    str1 = "kitten"
+    str2 = "sitting"
+    string_similarity(str1,str2)
+def string_similarity(str1,str2,ignore_cases=True):
+    """
+    功能：计算两个字符串的文本相似度
+    """
+    import difflib
+    if ignore_cases:
+        string1=str1.lower()
+        string2=str2.lower()
+    else:
+        string1=str1
+        string2=str2
+    # 创建SequenceMatcher对象
+    matcher = difflib.SequenceMatcher(None, string1, string2)
+    # 计算相似度
+    similarity = matcher.ratio()
+    #print(f"SequenceMatcher Similarity: {similarity:.2f}")
+    return similarity
 #==============================================================================
 if __name__ == '__main__':
     string = "HeLLo, Welcome to this New WorLd!"
@@ -4181,7 +4252,7 @@ if __name__ == '__main__':
 def contains_any(string, words):
     """
-    功能：测试字符串string中是否含有字符串列表words中的任意一个元素
+    功能：测试字符串string中是否含有字符串列表words中的任意一个元素，忽略字母大小写
     参数：
     string：字符串，大小写不限
     words：字符串列表，大小写不限
@@ -4208,6 +4279,183 @@ def contains_any(string, words):
     #检查字符串new_string是否包含列表new_words_list中的任何元素
     return any((word in new_string) for word in new_words_list)
+#==============================================================================
+if __name__ == '__main__':
+    string = "HeLLo, Welcome to this New WorLd!"
+    words = ["Hello", "World"]
+    words = ["Hello", "World","the"]
+    contains_all(string, words)
+def contains_all(string, words):
+    """
+    功能：测试字符串string中是否含有字符串列表words中的全部元素，忽略字母大小写
+    参数：
+    string：字符串，大小写不限
+    words：字符串列表，大小写不限
+    注意：为避免大小写字母的影响，比较前需要先将两边的字母全部小写化
+    """
+    result=True
+    for w in words:
+        if not contains_any(string,w):
+            result=False
+            break
+    return result
+#==============================================================================
+if __name__ == '__main__':
+    alist = ["CurrentDebt",
+            "CurrentDebtAndCapitalLeaseObligation",
+            "CurrentDeferredLiabilities",
+            "CurrentLiabilities",
+            "OtherCurrentBorrowings",
+            "OtherCurrentLiabilities",
+            "OtherNonCurrentLiabilities",
+            "TotalNonCurrentLiabilitiesNetMinorityInterest"]
+    alist = [
+            "CurrentDebtAndCapitalLeaseObligation",
+            "CurrentDeferredLiabilities",
+            "CurrentLiabilities",
+            "OtherCurrentBorrowings",
+            "OtherCurrentLiabilities",
+            "OtherNonCurrentLiabilities",
+            "TotalNonCurrentLiabilitiesNetMinorityInterest"]
+    item_words = ["Current", "Debt"]
+    item_words = ["Current", "Liabilities"]
+    perfect_match=True
+    list_contains_all(alist, item_words)
+def list_contains_all(alist, item_words,perfect_match=True):
+    """
+    功能：测试列表alist中是否有元素含有字符串列表item_words中的全部元素，忽略字母大小写
+    参数：
+    alist：字符串列表，大小写不限
+    item_words：字符串列表，大小写不限
+    注意：为避免大小写字母的影响，比较前需要先将两边的字母全部小写化
+    返回值：
+        若列表alist中有多个元素含有字符串列表item_words中的全部元素，返回相似度最高的元素
+        若无则返回False
+    """
+    DEBUG=False
+    #将item_words合成为一个字符串，以便比较相似度
+    words=''
+    for w in item_words:
+        words=words+w
+    if DEBUG:
+        print(f"  DEBUG: item_words={item_words}, words={words}")
+    result=False
+    best_similarity=0
+    for e in alist:
+        similarity=0
+        if DEBUG:
+            print(f"  DEBUG: e={e}")
+        if perfect_match: #要求e精确含有item_words中的每个元素
+            if contains_all(e,item_words):
+                similarity=string_similarity(e,words)
+        else:
+            similarity=string_similarity(e,words)
+        if DEBUG:
+            print(f"  DEBUG: item_words={item_words}, e={e}, similarity={similarity}")
+        if similarity > best_similarity:
+            best_similarity=similarity
+            result=e
+    return result,best_similarity
+if __name__ == '__main__':
+    alist = ["CurrentDebt",
+            "CurrentDebtAndCapitalLeaseObligation",
+            "CurrentDeferredLiabilities",
+            "CurrentLiabilities",
+            "OtherCurrentBorrowings",
+            "OtherCurrentLiabilities",
+            "OtherNonCurrentLiabilities",
+            "TotalNonCurrentLiabilitiesNetMinorityInterest"]
+    alist = [
+            "CurrentDebtAndCapitalLeaseObligation",
+            "CurrentDeferredLiabilities",
+            "CurrentLiabilities",
+            "OtherCurrentBorrowings",
+            "OtherCurrentLiabilities",
+            "OtherNonCurrentLiabilities",
+            "TotalNonCurrentLiabilitiesNetMinorityInterest"]
+    item_words_list=[["Current","Debt"],["Current","Liabilities"]]
+    item_words_list=[["Current","Liabilibities"],["Current","Debt"]]
+    list_contains_all_list(alist, item_words_list)
+def list_contains_all_list(alist, item_words_list):
+    """
+    功能：测试列表alist中是否有元素含有字符串列表组中item_words_list各个item_words中的全部元素，忽略字母大小写
+    参数：
+    alist：字符串列表，大小写不限
+    item_words_list：字符串列表组，大小写不限。第1个为最佳字符串列表，后面可跟多个替代最佳字符串列表
+    注意：为避免大小写字母的影响，比较前需要先将两边的字母全部小写化
+    返回值：
+        若列表alist中有多个元素含有字符串列表item_words中的全部元素，返回相似度最高的元素
+        若出现多个最高相似度相同的，则返回第一个
+        若无则返回False
+    """
+    DEBUG=False
+    best_result=False
+    best_similarity=0
+    for iwords in item_words_list:
+        result,similarity=list_contains_all(alist, iwords,perfect_match=False)
+        if DEBUG:
+            print("  DEBUG: iwords={0}, alist={1}".format(iwords,alist))
+            #print("  DEBUG: result={0}, similarity={1}".format(result,similarity))
+            print('')
+            print(f"  DEBUG: result={result}, similarity={similarity:.2f}")
+        if similarity > best_similarity:
+            best_similarity=similarity
+            best_result=result
+    return best_result
+#==============================================================================
+if __name__ == '__main__':
+    max_sleep=30
+    sleep_random(max_sleep)
+def sleep_random(max_sleep=30):
+    """
+    功能：随机挂起秒数，以防被数据源封堵IP地址，适用于连续抓取同种信息时。
+    参数：
+    max_sleep：最大挂起秒数，默认30秒。随机挂起1-30秒。
+    """
+    import time; import random
+    random_int=random.randint(1,max_sleep)
+    time.sleep(random_int)
+    return
 #==============================================================================
 if __name__ == '__main__':
     s = "Hello, 世界! This is a test string with symbols #$%^&*()."

siat 3.7.7__py3-none-any.whl → 3.7.9__py3-none-any.whl

siat 3.7.7py3-none-any.whl → 3.7.9py3-none-any.whl