PyPI - dingo-python - Versions diffs - 1.2__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

dingo-python 1.2py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dingo/data/converter/base.py +2 -2
dingo/data/dataset/base.py +17 -0
dingo/data/datasource/base.py +17 -0
dingo/data/utils/digit.py +17 -42
dingo/exec/local.py +9 -4
dingo/exec/spark.py +3 -1
dingo/io/input/InputArgs.py +10 -6
dingo/io/output/ResultInfo.py +11 -1
dingo/model/prompt/prompt_text_quality_v3.py +5 -4
dingo/model/rule/rule_common.py +106 -32
dingo/model/rule/utils/detect_lang.py +6 -0
dingo/model/rule/utils/util.py +12 -1
dingo/run/cli.py +4 -0
{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/METADATA +3 -1
{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/RECORD +19 -19
/dingo/model/rule/utils/{xyz_head_word.py → multi_lan_util.py} +0 -0
{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/LICENSE +0 -0
{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/WHEEL +0 -0
{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/top_level.txt +0 -0

dingo/data/converter/base.py CHANGED Viewed

@@ -66,7 +66,7 @@ class JsonConverter(BaseConverter):
                     'data_id': cls.find_levels_data(v, input_args.column_id) if input_args.column_id != '' else str(k),
                     'prompt': cls.find_levels_data(v, input_args.column_prompt) if input_args.column_prompt != '' else '',
                     'content': cls.find_levels_data(v, input_args.column_content) if input_args.column_content != '' else '',
-                    'raw_data': j
+                    'raw_data': v
                 })
         return _convert
@@ -91,7 +91,7 @@ class PlainConverter(BaseConverter):
                 'data_id': str(cls.data_id),
                 'prompt': '',
                 'content': raw,
-                'raw_data': {'data_id':str(cls.data_id), 'content': raw}
+                'raw_data': {'content': raw}
             })
             cls.data_id += 1
             return data

dingo/data/dataset/base.py CHANGED Viewed

@@ -1,3 +1,20 @@
+# This file is modified from:
+# https://github.com/mlflow/mlflow/blob/master/mlflow/data/dataset.py
+#
+# Copyright 2018 Databricks, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 from functools import wraps
 from abc import abstractmethod

dingo/data/datasource/base.py CHANGED Viewed

@@ -1,3 +1,20 @@
+# This file is modified from:
+# https://github.com/mlflow/mlflow/blob/master/mlflow/data/dataset_source.py
+#
+# Copyright 2018 Databricks, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 from functools import wraps
 from abc import abstractmethod

dingo/data/utils/digit.py CHANGED Viewed

@@ -1,3 +1,20 @@
+# This file is modified from:
+# https://github.com/mlflow/mlflow/blob/master/mlflow/data/digest_utils.py
+#
+# Copyright 2018 Databricks, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import logging
 from typing import Any, List
@@ -44,48 +61,6 @@ def compute_pandas_digest(df) -> str:
     )
-def compute_numpy_digest(features, targets=None) -> str:
-    """Computes a digest for the given numpy array.
-    Args:
-        features: A numpy array containing dataset features.
-        targets: A numpy array containing dataset targets. Optional.
-    Returns:
-        A string digest.
-    """
-    import numpy as np
-    import pandas as pd
-    hashable_elements = []
-    def hash_array(array):
-        flattened_array = array.flatten()
-        trimmed_array = flattened_array[0:MAX_ROWS]
-        try:
-            hashable_elements.append(pd.util.hash_array(trimmed_array))
-        except TypeError:
-            hashable_elements.append(np.int64(trimmed_array.size))
-        # hash full array dimensions
-        for x in array.shape:
-            hashable_elements.append(np.int64(x))
-    def hash_dict_of_arrays(array_dict):
-        for key in sorted(array_dict.keys()):
-            hash_array(array_dict[key])
-    for item in [features, targets]:
-        if item is None:
-            continue
-        if isinstance(item, dict):
-            hash_dict_of_arrays(item)
-        else:
-            hash_array(item)
-    return get_normalized_md5_digest(hashable_elements)
 def get_normalized_md5_digest(elements: List[Any]) -> str:
     """Computes a normalized digest for a list of hashable elements.

dingo/exec/local.py CHANGED Viewed

@@ -79,7 +79,6 @@ class LocalExecutor(Executor):
             if self.input_args.save_data:
                 self.save_data(output_path)
-            log.debug(self.summary)
         return [self.summary]
     def evaluate(self):
@@ -89,7 +88,6 @@ class LocalExecutor(Executor):
             group (Any): _description_
             group_type (str): _description_
         """
-        log.debug('[get_score]:' + self.input_args.input_path)
         with concurrent.futures.ThreadPoolExecutor(max_workers=self.input_args.max_workers) as executor:
             data_iter = self.load_data()
             def process_batch(batch: List):
@@ -178,6 +176,7 @@ class LocalExecutor(Executor):
                         self.summary.name_ratio[n] += 1
         self.summary.total += 1
+        log.info(f'[Data Number]: {str(self.summary.total)} ')
     def evaluate_rule(self, group: List[BaseRule], d: MetaData) -> ResultInfo:
         result_info = ResultInfo(data_id=d.data_id, prompt=d.prompt, content=d.content)
@@ -275,7 +274,10 @@ class LocalExecutor(Executor):
                     os.makedirs(p_t)
                 f_n = os.path.join(path, t, n) + ".jsonl"
                 with open(f_n, 'a', encoding='utf-8') as f:
-                    str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
+                    if self.input_args.save_raw:
+                        str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
+                    else:
+                        str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
                     f.write(str_json + '\n')
         if self.input_args.save_correct:
             for result_info in self.good_info_list:
@@ -287,7 +289,10 @@ class LocalExecutor(Executor):
                         os.makedirs(p_t)
                     f_n = os.path.join(path, t, n) + ".jsonl"
                     with open(f_n, 'a', encoding='utf-8') as f:
-                        str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
+                        if self.input_args.save_raw:
+                            str_json = json.dumps(result_info.to_raw_dict(), ensure_ascii=False)
+                        else:
+                            str_json = json.dumps(result_info.to_dict(), ensure_ascii=False)
                         f.write(str_json + '\n')
         with open(path + '/summary.json', 'w', encoding='utf-8') as f:

dingo/exec/spark.py CHANGED Viewed

@@ -31,6 +31,7 @@ class SparkExecutor(Executor):
                  spark_conf: SparkConf = None):
         # eval param
         self.llm: Optional[BaseLLM] = None
+        self.group: Optional[Dict] = None
         self.summary: Optional[SummaryModel] = None
         self.bad_info_list: Optional[RDD] = None
         self.good_info_list: Optional[RDD] = None
@@ -72,6 +73,7 @@ class SparkExecutor(Executor):
     def execute(self) -> List[SummaryModel]:
         create_time = time.strftime('%Y%m%d_%H%M%S', time.localtime())
         Model.apply_config(self.input_args.custom_config, self.input_args.eval_group)
+        self.group = Model.get_group(self.input_args.eval_group)
         if GlobalConfig.config and GlobalConfig.config.llm_config:
             for llm_name in GlobalConfig.config.llm_config:
                 self.llm = Model.get_llm(llm_name)
@@ -148,7 +150,7 @@ class SparkExecutor(Executor):
         good_name_list = []
         bad_reason_list = []
         good_reason_list = []
-        for group_type, group in Model.get_group(self.input_args.eval_group).items():
+        for group_type, group in self.group.items():
             if group_type == 'rule':
                 r_i = self.evaluate_rule(group, data)
             elif group_type == 'prompt':

dingo/io/input/InputArgs.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
 from typing import Optional
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 class InputArgs(BaseModel):
     """
@@ -35,10 +34,15 @@ class InputArgs(BaseModel):
     custom_config: Optional[str | dict] = None
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+    class Config:
+        extra = 'forbid'  # Forbid extra parameters
-        self.check_args()
+    def __init__(self, **kwargs):
+        try:
+            super().__init__(**kwargs)
+            self.check_args()
+        except ValidationError as e:
+            raise ValueError(f"Invalid input parameters: {e}")
     def check_args(self):
         # check eval group
@@ -46,7 +50,7 @@ class InputArgs(BaseModel):
             raise ValueError("eval_group cannot be empty.")
         # check input path
-        if self.dataset != 'hugging_face' and  not os.path.exists(self.input_path):
+        if self.dataset != 'hugging_face' and not os.path.exists(self.input_path):
             raise FileNotFoundError(f"Input path '{self.input_path}' does not exist.")
         # check save_data/save_correct

dingo/io/output/ResultInfo.py CHANGED Viewed

@@ -23,4 +23,14 @@ class ResultInfo(BaseModel):
             'name_list': self.name_list,
             'reason_list': self.reason_list,
             'raw_data': self.raw_data
-        }
+        }
+    def to_raw_dict(self):
+        dingo_result = {
+            'error_status': self.error_status,
+            'type_list': self.type_list,
+            'name_list': self.name_list,
+            'reason_list': self.reason_list,
+        }
+        self.raw_data['dingo_result'] = dingo_result
+        return self.raw_data

dingo/model/prompt/prompt_text_quality_v3.py CHANGED Viewed

@@ -15,11 +15,12 @@ Your primary objective is to assess the suitability of this dataset for training
 1. Completeness
     1.1 Error_Formula_Table: If the text contains formulas or tables, then their format or content is incorrect.
     1.2 Error_List_Number: If the text contains list number, numbers in the list number have inconsistent formats and logical discontinuity.
-    1.3 Error_Section_Order: The order of text is cluttered, such as titles embedded in the main text or structures that are difficult to read.
+    1.3 Error_Line_Segment: The text contains sentences unreasonably divided into multiple lines by line breaks; Or the text contains segments stuck together due to lacking line breaks.
 2. Effectiveness
     2.1 Error_Garbled_Characters: The text contains a large amount of garbled and anti crawler characters.
     2.2 Error_Words_Stuck: The text contains a large number of words that are stuck together without being separated by spaces. Words with hyphens are considered normal and treat newline characters (\n) as spaces.
-    2.3 Error_Lack_Punctuation: Text contains a large number of words piled up, which cannot form a sentence when connected together.
+    2.3 Error_Lack_Punctuation: The text contains a large number of words piled up, which cannot form a sentence when connected together.
+    2.4 Error_Empty_Content: The text contains no other characters except for spaces, line breaks, carriage returns, and tabs.
 3. Similarity
     3.1 Error_Duplicate_Content: The text contains consecutive repeated text and multiple occurrences of characters.
 4. Security
@@ -31,8 +32,8 @@ Your primary objective is to assess the suitability of this dataset for training
     -If the text does not hit any negative criteria above, type must only be 'Good'; otherwise, type must only be one of the list ['Completeness', 'Effectiveness', 'Similarity', 'Security'].
 3. Assign a name to the text.
     -If type is 'Good', name must only be 'None'.
-    -If type is "Completeness", name must only be one of the list ["Error_Formula_Table", "Error_List_Number", "Error_Section_Order"]
-    -If type is "Effectiveness", name must only be one of the list ["Error_Garbled_Characters", "Error_Words_Stuck" or "Error_Lack_Punctuation"]
+    -If type is "Completeness", name must only be one of the list ["Error_Formula_Table", "Error_List_Number", "Error_Line_Segment"]
+    -If type is "Effectiveness", name must only be one of the list ["Error_Garbled_Characters", "Error_Words_Stuck", "Error_Lack_Punctuation" or "Error_Empty_Content"]
     -If type is "Similarity", name must only be one of the list ["Error_Duplicate_Content"]
     -If type is "Security", name must only be one of the list ["Error_Political_Content", "Error_Prohibited_Content"]
 4. Assign a score to the text according the type. If the type is "Good", score is 1, otherwise the score is 0.

dingo/model/rule/rule_common.py CHANGED Viewed

@@ -38,10 +38,12 @@ class RuleAlphaWords(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         from nltk.tokenize import word_tokenize
-        from dingo.model.rule.utils.detect_lang import decide_language_by_str
+        from dingo.model.rule.utils.detect_lang import decide_language_by_str, set_fasttext
         res = ModelRes()
         content = input_data.content
+        if cls.dynamic_config.refer_path is not None and len(cls.dynamic_config.refer_path) != 0:
+            set_fasttext(cls.dynamic_config.refer_path[0])
         language = decide_language_by_str(content)
         if language != 'en':
             return res
@@ -76,6 +78,8 @@ class RuleCapitalWords(BaseRule):
         content = input_data.content
         words = WordPunctTokenizer().tokenize(content)
         num_words = len(words)
+        if num_words == 0:
+            return res
         num_caps_words = sum(map(str.isupper, words))
         ratio = num_caps_words / num_words
         if ratio > cls.dynamic_config.threshold and num_words < 200:
@@ -149,7 +153,9 @@ class RuleColonEnd(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all',
+                                                   'llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi',
+                                                   'multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1','pdf'])
 class RuleContentNull(BaseRule):
     """check whether content is null"""
@@ -167,7 +173,7 @@ class RuleContentNull(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all', 'qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all', 'qa_standard_v1','pdf'])
 class RuleContentShort(BaseRule):
     dynamic_config = DynamicRuleConfig(threshold = 20)
@@ -184,7 +190,8 @@ class RuleContentShort(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th',
+                                                   'multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
 class RuleContentShortMultiLan(BaseRule):
     """check whether content is too short."""
@@ -216,9 +223,11 @@ class RuleCurlyBracket(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
         content = input_data.content
+        if len(content) == 0:
+            return res
         num = content.count('{') + content.count('}')
-        ratio = num / len(content) if len(content) !=0 else 0
+        ratio = num / len(content)
         if ratio > cls.dynamic_config.threshold:
             res.error_status = True
             res.type = cls.metric_type
@@ -227,7 +236,9 @@ class RuleCurlyBracket(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_SIMILARITY', ['default','sft','pretrain','benchmark','text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
+@Model.rule_register('QUALITY_BAD_SIMILARITY', ['default','sft','pretrain','benchmark','text_base_all',
+                                                'llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th',
+                                                'multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','pdf'])
 class RuleDocRepeat(BaseRule):
     """check whether content repeats"""
@@ -247,7 +258,9 @@ class RuleDocRepeat(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr', 'qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko',
+                                                   'multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu',
+                                                   'multi_lan_sr', 'qa_standard_v1','pdf'])
 class RuleEnterMore(BaseRule):
     """check whether content has 8 consecutive carriage returns."""
@@ -269,7 +282,9 @@ class RuleEnterMore(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr', 'qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko',
+                                                   'multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu',
+                                                   'multi_lan_sr', 'qa_standard_v1','pdf'])
 class RuleEnterRatioMore(BaseRule):
     """check whether the number of enter / the number of content > 25%"""
@@ -299,7 +314,7 @@ class RuleHeadWordAr(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("ar")
@@ -321,7 +336,7 @@ class RuleHeadWordCs(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("cs")
@@ -343,7 +358,7 @@ class RuleHeadWordHu(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("hu")
@@ -354,7 +369,7 @@ class RuleHeadWordHu(BaseRule):
             res.type = cls.metric_type
             res.name = cls.__name__
             res.reason = ['Content has irrelevance tail source info.']
-            return res
+        return res
 @Model.rule_register('QUALITY_BAD_RELEVANCE', ['multi_lan_ko'])
@@ -365,7 +380,7 @@ class RuleHeadWordKo(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("ko")
@@ -387,7 +402,7 @@ class RuleHeadWordRu(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("ru")
@@ -409,7 +424,7 @@ class RuleHeadWordSr(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("sr")
@@ -431,7 +446,7 @@ class RuleHeadWordTh(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("th")
@@ -453,7 +468,7 @@ class RuleHeadWordVi(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.xyz_head_word import get_xyz_head_word
+        from dingo.model.rule.utils.multi_lan_util import get_xyz_head_word
         res = ModelRes()
         keyword = get_xyz_head_word("vi")
@@ -467,7 +482,9 @@ class RuleHeadWordVi(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all',
+                                                   'multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi',
+                                                   'multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1','pdf'])
 class RuleHtmlEntity(BaseRule):
     """check whether content has html entity"""
@@ -491,6 +508,8 @@ class RuleHtmlEntity(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
         content = input_data.content
+        if len(content) == 0:
+            return res
         entities = cls.dynamic_config.key_list
         full_entities_1 = [f"&{entity}；" for entity in entities]
@@ -520,7 +539,9 @@ class RuleHtmlEntity(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','multi_lan_ar','multi_lan_ko','multi_lan_ru',
+                                                   'multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr',
+                                                   'qa_standard_v1','pdf'])
 class RuleHtmlTag(BaseRule):
     """check whether content has image links or html tags."""
@@ -530,6 +551,8 @@ class RuleHtmlTag(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
         content = input_data.content
+        if len(content) == 0:
+            return res
         matches = re.findall('|'.join(cls.dynamic_config.key_list), content)
         num = len(matches)
@@ -563,7 +586,9 @@ class RuleIDCard(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr', 'qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','multi_lan_ar','multi_lan_ko','multi_lan_ru',
+                                                   'multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr',
+                                                   'qa_standard_v1'])
 class RuleInvisibleChar(BaseRule):
     """check whether content has invisible chars."""
@@ -573,6 +598,8 @@ class RuleInvisibleChar(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
         content = input_data.content
+        if len(content) == 0:
+            return res
         matches = re.findall(cls.dynamic_config.pattern, content)
         num = len(matches)
@@ -791,7 +818,9 @@ class RuleMeanWordLength(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_FLUENCY', ['default','sft','pretrain','benchmark','text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
+@Model.rule_register('QUALITY_BAD_FLUENCY', ['default','sft','pretrain','benchmark','text_base_all',
+                                             'llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th',
+                                             'multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
 class RuleNoPunc(BaseRule):
     """check whether paragraph has no punctuation."""
@@ -799,10 +828,12 @@ class RuleNoPunc(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
-        from dingo.model.rule.utils.detect_lang import decide_language_by_str
+        from dingo.model.rule.utils.detect_lang import decide_language_by_str, set_fasttext
         res = ModelRes()
         content = input_data.content
+        if cls.dynamic_config.refer_path is not None and len(cls.dynamic_config.refer_path) != 0:
+            set_fasttext(cls.dynamic_config.refer_path[0])
         language = decide_language_by_str(content)
         if language != 'en':
             return res
@@ -867,7 +898,9 @@ class RuleSentenceNumber(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko',
+                                                   'multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu',
+                                                   'multi_lan_sr','qa_standard_v1','pdf'])
 class RuleSpaceMore(BaseRule):
     """check whether content has 500 spaces."""
@@ -887,7 +920,10 @@ class RuleSpaceMore(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['default','sft','pretrain','benchmark','text_base_all',
+                                                   'llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th',
+                                                   'multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1',
+                                                   'pdf'])
 class RuleSpecialCharacter(BaseRule):
     """check whether content has special characters. """
@@ -897,7 +933,8 @@ class RuleSpecialCharacter(BaseRule):
             # r"(\\\\;){3,}|(\{\}){3,}|(&nbsp;){3,}",
             r"&#247;|\? :",
             r"[�□]|\{\/U\}",
-            r"U\+26[0-F][0-D]|U\+273[3-4]|U\+1F[3-6][0-4][0-F]|U\+1F6[8-F][0-F]"
+            r"U\+26[0-F][0-D]|U\+273[3-4]|U\+1F[3-6][0-4][0-F]|U\+1F6[8-F][0-F]",
+            r"<\|.*?\|>"
         ]
     )
@@ -905,6 +942,8 @@ class RuleSpecialCharacter(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
         content = input_data.content
+        if len(content) == 0:
+            return res
         matches = []
         num = 0
@@ -930,11 +969,13 @@ class RuleStopWord(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         from nltk.tokenize import WordPunctTokenizer
-        from dingo.model.rule.utils.detect_lang import decide_language_by_str
+        from dingo.model.rule.utils.detect_lang import decide_language_by_str, set_fasttext
         from dingo.model.rule.utils.util import get_stop_words
         res = ModelRes()
         raw_content = input_data.content
+        if cls.dynamic_config.refer_path is not None and len(cls.dynamic_config.refer_path) != 0:
+            set_fasttext(cls.dynamic_config.refer_path[0])
         language = decide_language_by_str(raw_content)
         if language != 'en':
             return res
@@ -1018,7 +1059,32 @@ class RuleUniqueWords(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr','qa_standard_v1'])
+@Model.rule_register("QUALITY_BAD_SECURITY", [])
+class RuleUnsafeWords(BaseRule):
+    """check whether content contains unsafe words."""
+    dynamic_config = DynamicRuleConfig(refer_path=[])
+    @classmethod
+    def eval(cls, input_data: MetaData) -> ModelRes:
+        from dingo.model.rule.utils.util import get_unsafe_words
+        res = ModelRes()
+        content = input_data.content
+        if cls.dynamic_config.key_list is None:
+            cls.dynamic_config.key_list = get_unsafe_words(cls.dynamic_config.refer_path)
+        matches = list(filter(lambda x:x in content, cls.dynamic_config.key_list))
+        if matches:
+            res.error_status = True
+            res.type = cls.metric_type
+            res.name = cls.__name__
+            res.reason = matches
+        return res
+@Model.rule_register('QUALITY_BAD_EFFECTIVENESS', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko',
+                                                   'multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu',
+                                                   'multi_lan_sr','qa_standard_v1','pdf'])
 class RuleOnlyUrl(BaseRule):
     """check whether content is only an url link."""
@@ -1027,8 +1093,12 @@ class RuleOnlyUrl(BaseRule):
     @classmethod
     def eval(cls, input_data: MetaData) -> ModelRes:
         res = ModelRes()
+        content = input_data.content
+        if len(content.strip()) == 0:
+            return res
         SEARCH_REGEX = re.compile(cls.dynamic_config.pattern)
-        content_without_url = SEARCH_REGEX.sub("", input_data.content)
+        content_without_url = SEARCH_REGEX.sub("", content)
+        print(content_without_url)
         if len(content_without_url.strip()) == 0:
             res.error_status = True
             res.type = cls.metric_type
@@ -1098,7 +1168,9 @@ class RuleWordSplit(BaseRule):
         return res
-@Model.rule_register('QUALITY_BAD_FLUENCY', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko','multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu','multi_lan_sr'])
+@Model.rule_register('QUALITY_BAD_FLUENCY', ['text_base_all','llm_base','multi_lan_ar','multi_lan_ko',
+                                             'multi_lan_ru','multi_lan_th','multi_lan_vi','multi_lan_cs','multi_lan_hu',
+                                             'multi_lan_sr'])
 class RuleWordStuck(BaseRule):
     """check whether words are stuck."""
@@ -1116,11 +1188,13 @@ class RuleWordStuck(BaseRule):
     def eval(cls, input_data: MetaData) -> ModelRes:
         import wordninja
-        from dingo.model.rule.utils.detect_lang import decide_language_by_str
+        from dingo.model.rule.utils.detect_lang import decide_language_by_str, set_fasttext
         from dingo.model.rule.utils.util import is_sha256
         res = ModelRes()
         content = input_data.content
+        if cls.dynamic_config.refer_path is not None and len(cls.dynamic_config.refer_path) != 0:
+            set_fasttext(cls.dynamic_config.refer_path[0])
         language = decide_language_by_str(content)
         if language != 'en':
             return res
@@ -1148,7 +1222,7 @@ if __name__ == '__main__':
     data = MetaData(
         data_id = '',
         prompt = '',
-        content = " �FA OR FICTION? WH CA IT DO?{{{{{{{{{{{ "
+        content = "  \n  \n"
     )
-    tmp = RuleSpecialCharacter().eval(data)
+    tmp = RuleOnlyUrl().eval(data)
     print(tmp)

dingo/model/rule/utils/detect_lang.py CHANGED Viewed

@@ -6,9 +6,15 @@ from huggingface_hub import hf_hub_download
 from dingo.utils import log
 _global_lang_detect = []
+_fasttext_path = ''
+def set_fasttext(path: str):
+    global _fasttext_path
+    _fasttext_path = path
 def download_fasttext() -> str:
+    if _fasttext_path:
+        return _fasttext_path
     file_path = hf_hub_download(repo_id='chupei/fasttext.lib.176.bin', filename='lid.176.bin')
     return file_path

dingo/model/rule/utils/util.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import re
 import os
 import sys
@@ -6,7 +7,7 @@ import string
 import unicodedata
 import zhon.hanzi
-from typing import Set, Tuple, Callable
+from typing import Set, Tuple, Callable, List
 from collections import Counter
 from zhon.hanzi import punctuation
@@ -61,6 +62,16 @@ class TextSlice:
         self.end = end
+def get_unsafe_words(file_path_list: List[str]) -> List:
+    unsafe_words_list = []
+    for file_path in file_path_list:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                j = json.loads(line)
+                word = str(j['word'])
+                unsafe_words_list.append(word)
+    return unsafe_words_list
 def split_paragraphs(
         text: str, normalizer: Callable[[str], str], remove_empty: bool = True
 ) -> Tuple[TextSlice]:

dingo/run/cli.py CHANGED Viewed

@@ -25,6 +25,8 @@ def parse_args():
                         default=None, help="Save data in output path")
     parser.add_argument("--save_correct", type=bool,
                         default=None, help="Save correct data in output path")
+    parser.add_argument("--save_raw", type=bool,
+                        default=None, help="Save raw data in output path")
     parser.add_argument("--data_format", type=str,
                         default=None, choices=['json', 'jsonl', 'listjson', 'plaintext', 'image', 's3_image'],
                         help="Dataset format (in ['json', 'jsonl', 'listjson', 'plaintext', 'image', 's3_image']), default is 'json'")
@@ -100,6 +102,8 @@ if __name__ == '__main__':
             input_data['save_data'] = args.save_data
         if args.save_correct:
             input_data['save_correct'] = args.save_correct
+        if args.save_raw:
+            input_data['save_raw'] = args.save_raw
         if args.data_format:
             input_data['data_format'] = args.data_format
         if args.dataset:

{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dingo-python
-Version: 1.2
+Version: 1.2.2
 Summary: Language quality evaluation tool.
 Home-page: https://github.com/shijinpjlab/Dingo/main
 Author: SH AI Lab
@@ -16,10 +16,12 @@ Requires-Dist: chardet
 Requires-Dist: datasets
 Requires-Dist: fasttext-wheel ==0.9.2
 Requires-Dist: hanziconv
+Requires-Dist: httpx ==0.27.2
 Requires-Dist: huggingface-hub
 Requires-Dist: jieba
 Requires-Dist: jsonlines
 Requires-Dist: langid
+Requires-Dist: openai ==1.56.2
 Requires-Dist: opencv-python
 Requires-Dist: packaging
 Requires-Dist: pandas

{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/RECORD RENAMED Viewed

@@ -3,30 +3,30 @@ dingo/config/__init__.py,sha256=8qPvpZTKUBcZqAqu2S5b2P-GRQGMV6VwGYl8bvQDLI0,45
 dingo/config/config.py,sha256=EstF5mWkLrA24Eg1SbOir1fVTCN_-9n3w4rz54RjJcA,1883
 dingo/data/__init__.py,sha256=X7ZLiJN8vDpzRufwpJ9E36phqbW9gEpMT68TxzWLDt4,172
 dingo/data/converter/__init__.py,sha256=1MiG4H8Sg2sYHQmYdg0F9_1okP_YoMNHyQorPEAf6zw,91
-dingo/data/converter/base.py,sha256=Y81bQtc56hKeziLoB5IIdvKJtqHNs2XNzvC2IWM7QbE,6705
+dingo/data/converter/base.py,sha256=hvXPtYtHR97_LeywgKrEwy7Trr6Yx-qL1WCCi4_R_zs,6677
 dingo/data/converter/img_utils.py,sha256=Pjy4Db3bETAuRmkVO5GzUxTE_hNJhnYyQEJXd_nHaXk,3516
 dingo/data/dataset/__init__.py,sha256=AdBLdr3j4NN-wGvQOuPi_jmzkMcggJApdQ24spLN3-U,405
-dingo/data/dataset/base.py,sha256=o_gqMquedxl2zWxanCcq25XiqDhtvsjGX0CqSALoMGo,4813
+dingo/data/dataset/base.py,sha256=0mr2qXhfu6hhJ0Dz2nk7S_ZQd4k7PfMfxEprRzkJNnE,5518
 dingo/data/dataset/huggingface.py,sha256=kHtfXKSptxv3hQRGv6WNFMyN3m0nr7CECPU8ESLXGfQ,7181
 dingo/data/dataset/local.py,sha256=6HSfF4vGANh1KcxNyjohOqKrnqPzjjC11gHFZK5aITQ,2654
 dingo/data/dataset/spark.py,sha256=lBubZM7lJrPHO6hcnuD39eEtDB7nuLjWNbQi6jQCItI,4119
 dingo/data/datasource/__init__.py,sha256=nr7dX7c2ylLBJVU9gnAcZzqMTYMRTbhopVAO_dOs0Dw,427
-dingo/data/datasource/base.py,sha256=_Fy92I3LntR0H8Zc-eX6fe3GFhM7de1Lz1--pyOkuQo,2220
+dingo/data/datasource/base.py,sha256=T9y8uxMegHgbb6o7aPbmeLIr5xSAOOl7k5Fpab6jZNc,2931
 dingo/data/datasource/huggingface.py,sha256=-0JCr8f1cOAmWIqZnO8E10QEJ4tiWxSftoUOE6woZI4,3744
 dingo/data/datasource/local.py,sha256=GEa3-P5FTdeS-SWRyLRCew9WXfNl8E9I6AUXSU4eJlg,2672
 dingo/data/datasource/s3.py,sha256=5u8TZN67qVjJD3QQSGEeSmldBHY0HeEvm0s3HB3W0BU,2778
 dingo/data/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dingo/data/utils/digit.py,sha256=NyfCdjo6lGro0jILSORrEZLSxn06FhwZ9Nn6o_mKISI,2882
+dingo/data/utils/digit.py,sha256=V_Cy8o0t0JdBHOJZmi0A6nSczSfi2AbdE23fcWbTN_s,2415
 dingo/data/utils/insecure_hash.py,sha256=1FnevDyjeOrtsBQVlckJDEbk6mItMvfj07_Ut7oBioo,447
 dingo/exec/__init__.py,sha256=5faQMKKWAx4OFxBNN5FOnJuqjf-iXBEDWGOYxnVogV4,341
 dingo/exec/base.py,sha256=upeqzXVX7IiFWfcx3XFdaVmPmZIKMJ0-EtgCRt_Ws-w,1311
-dingo/exec/local.py,sha256=H4UJrzdHTnFp50p981rN0_Btqqdb0gfUa0sUXFtGS10,12303
-dingo/exec/spark.py,sha256=j5af-tdbciTRUQwaC1U2jNb9JntbYZ8WQ1NBqPPwHsQ,11426
+dingo/exec/local.py,sha256=_l3e5mIEWt8YNYAho8cKWw26yUeGj7jODchIc_bJKTA,12623
+dingo/exec/spark.py,sha256=7M-pG78Ugp1Shy20_cTe3-eIl7sTWCQ72KiNE3xoPww,11500
 dingo/io/__init__.py,sha256=XxTZKh8nVsoYjfPriaTvW7Or7lNM_11SjJ8uC-T3kws,196
-dingo/io/input/InputArgs.py,sha256=7yh5XVLXrrAJeDEzbgiSE4vq-7BxfFuYa2Dk4E_0nvM,2223
+dingo/io/input/InputArgs.py,sha256=v2O8gpR8Jni861eV5eU9Am8d4RgypJuYbPDiKdeOCdQ,2429
 dingo/io/input/MetaData.py,sha256=BhJtPA-tTpN7-RhZF42eHO7e4VY4Bl88fevXABKKXbc,272
 dingo/io/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dingo/io/output/ResultInfo.py,sha256=rpy-ImKV5HfAd65IETrAw4W0X8oSSU2BTqfjBtfVoK0,676
+dingo/io/output/ResultInfo.py,sha256=T0BPigEk9hU7wj5UdjAHFxBfPFg0YecaCPMHVxjVd08,995
 dingo/io/output/SummaryModel.py,sha256=sl05AaeT4yTMQrjp4EVETKmEIa5nOgO0ReGKt-x0wXQ,1008
 dingo/io/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dingo/model/__init__.py,sha256=CULKDg2nazgoRvg8j2Ue8GBzZnTXwztX-t0REyAs8SQ,56
@@ -57,25 +57,25 @@ dingo/model/prompt/prompt_image.py,sha256=Y35RwTeWxXeEmdEMEz5BWSpxWi5sPUk-iQrPqX
 dingo/model/prompt/prompt_text_language.py,sha256=5NNM2rXJk5tJxJXfALjGtxwV7H97et81Xr26xqVK7_M,2870
 dingo/model/prompt/prompt_text_quality_multilan.py,sha256=_xDdz5ytNvZmP8DM7S4c329usPnCi76ftlNMezmCb94,2173
 dingo/model/prompt/prompt_text_quality_v2.py,sha256=F4W-SmFvRiCKRkN4PEb3vVUecUuUHWknQ_K1eN83krY,3565
-dingo/model/prompt/prompt_text_quality_v3.py,sha256=CcEchxdCYzs1gHd8Aj0tFJVugF9__KldzK7XVubFLV0,3325
+dingo/model/prompt/prompt_text_quality_v3.py,sha256=Nkxr5Jz6rpYBfFtSMlSj8zNIO_StM4kFkWKuEWJ8u7M,3520
 dingo/model/rule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dingo/model/rule/base.py,sha256=9kxxq59oaCadZiIOxZsnhrUcSJgeesNB-iXepdSp0h4,427
-dingo/model/rule/rule_common.py,sha256=I5Z9eivaIq4H6Z9iNlf2EWrCIwASpGrTK5rmAbaVqng,42375
+dingo/model/rule/rule_common.py,sha256=wnVN3Pncg9Mb4DQ9qk--A3cI0AbkvTvrh9iDoaTPuFQ,45633
 dingo/model/rule/rule_image.py,sha256=0vclF5CXUMk25Gs3uWc0YyP91kOtROns7M_fA6wswl4,5766
 dingo/model/rule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dingo/model/rule/utils/detect_lang.py,sha256=IcUgchI2cKZis6qqSj8YOwggTm75Mh2JGEhXxEdNUUo,5190
+dingo/model/rule/utils/detect_lang.py,sha256=wuCxR_JuTTG0Jj2A9nqN581h9NL11W92bzdvDEeQ46M,5344
 dingo/model/rule/utils/image_util.py,sha256=YovsL-uLNNpUsY6iLVFwvO6kwNpPXtAm8aoHAWboafw,187
-dingo/model/rule/utils/util.py,sha256=SUuho9NmUU6gIaDOgroGXCFAhZFejFEH4u7SUI-LYEg,13473
-dingo/model/rule/utils/xyz_head_word.py,sha256=D2sgNyRQL8JOuD807_kah0NR59PI9mioK3nZBMpoT54,2710
+dingo/model/rule/utils/multi_lan_util.py,sha256=D2sgNyRQL8JOuD807_kah0NR59PI9mioK3nZBMpoT54,2710
+dingo/model/rule/utils/util.py,sha256=iwCBUcoKhUUZUnVz-jAhoAQT6j5jcYoNfN9XI2v0CMc,13849
 dingo/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dingo/run/cli.py,sha256=fL1Vo8nd4UAPtDvzZi4exPorbz1Fyx0Wvej2dFvEyE0,6486
+dingo/run/cli.py,sha256=_Ly3AAQm2xsJ4eOFvxoABhUDYXQKSdiY7sTtRIuD_HU,6687
 dingo/run/vsl.py,sha256=ygmlVdKH99mo2JfVDfMpv2UItjkn6S-eoPRosrxOPM4,7341
 dingo/run/web.py,sha256=Dyl97ur92ecmyf-8JgttdvEEXviWqLtm8iJxtVuauWI,1599
 dingo/utils/__init__.py,sha256=masgEgU90tbPMKtZz5NF1oraNMrx1xLpHQ9B8QMPm9o,37
 dingo/utils/log_util/__init__.py,sha256=B4SurbYC7MqlI9ILM2_gS4QPLYj_UbyPRQQSpcGccdI,721
 dingo/utils/log_util/logger.py,sha256=jliGVit4mHB17nBeXOqbLHrlEWwuZJsNu_xBDmxr42I,1424
-dingo_python-1.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-dingo_python-1.2.dist-info/METADATA,sha256=BbTFYYiWeigLydAhycy-RugD67BksnttG_v4Gt7TsO4,9964
-dingo_python-1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-dingo_python-1.2.dist-info/top_level.txt,sha256=gSXQSLowu_WOQRi75wK3qyjbHxeN5PqsaA4ChGmJdek,6
-dingo_python-1.2.dist-info/RECORD,,
+dingo_python-1.2.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+dingo_python-1.2.2.dist-info/METADATA,sha256=cBUwU1lcfSBtUsbotDm0ha_H7Jf7hF8lwlqzWPPTsdo,10027
+dingo_python-1.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+dingo_python-1.2.2.dist-info/top_level.txt,sha256=gSXQSLowu_WOQRi75wK3qyjbHxeN5PqsaA4ChGmJdek,6
+dingo_python-1.2.2.dist-info/RECORD,,

/dingo/model/rule/utils/{xyz_head_word.py → multi_lan_util.py} RENAMED Viewed

File without changes

{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{dingo_python-1.2.dist-info → dingo_python-1.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

dingo-python 1.2__py3-none-any.whl → 1.2.2__py3-none-any.whl

dingo-python 1.2py3-none-any.whl → 1.2.2py3-none-any.whl