fosslight-util 1.4.47__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,46 +5,41 @@
5
5
 
6
6
  import logging
7
7
  import os
8
- from fosslight_util.constant import LOGGER_NAME, FL_DEPENDENCY, FL_BINARY
8
+ from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SCANNER
9
+ from fosslight_util.cover import CoverItem
10
+ from typing import List, Dict
9
11
 
10
12
  _logger = logging.getLogger(LOGGER_NAME)
11
13
 
12
14
 
13
15
  class OssItem:
14
- def __init__(self, value):
15
- self._name = ""
16
- self._version = ""
16
+
17
+ def __init__(self, name="", version="", license="", dl_url=""):
18
+ self.name = name
19
+ self.version = version
17
20
  self._license = []
18
- self._copyright = ""
21
+ self.license = license
22
+ self.download_location = dl_url
23
+ self.exclude = False
19
24
  self.comment = ""
20
- self._exclude = False
21
25
  self.homepage = ""
22
- self.relative_path = value
23
- self._source_name_or_path = []
24
- self.download_location = ""
25
- self._yocto_recipe = []
26
- self._yocto_package = []
27
- self.is_binary = False
28
- self._depends_on = []
29
- self.purl = ""
30
- self.bin_vulnerability = ""
31
- self.bin_tlsh = ""
32
- self.bin_sha1 = ""
26
+ self._copyright = ""
33
27
 
34
28
  def __del__(self):
35
29
  pass
36
30
 
37
31
  @property
38
- def copyright(self):
39
- return self._copyright
32
+ def license(self):
33
+ return self._license
40
34
 
41
- @copyright.setter
42
- def copyright(self, value):
35
+ @license.setter
36
+ def license(self, value):
43
37
  if value != "":
44
- if isinstance(value, list):
45
- value = "\n".join(value)
46
- value = value.strip()
47
- self._copyright = value
38
+ if not isinstance(value, list):
39
+ value = value.split(",")
40
+ self._license.extend(value)
41
+ self._license = [item.strip() for item in self._license]
42
+ self._license = list(set(self._license))
48
43
 
49
44
  @property
50
45
  def exclude(self):
@@ -58,13 +53,16 @@ class OssItem:
58
53
  self._exclude = False
59
54
 
60
55
  @property
61
- def name(self):
62
- return self._name
56
+ def copyright(self):
57
+ return self._copyright
63
58
 
64
- @name.setter
65
- def name(self, value):
59
+ @copyright.setter
60
+ def copyright(self, value):
66
61
  if value != "":
67
- self._name = value
62
+ if isinstance(value, list):
63
+ value = "\n".join(value)
64
+ value = value.strip()
65
+ self._copyright = value
68
66
 
69
67
  @property
70
68
  def version(self):
@@ -78,149 +76,144 @@ class OssItem:
78
76
  self._version = ""
79
77
 
80
78
  @property
81
- def license(self):
82
- return self._license
83
-
84
- @license.setter
85
- def license(self, value):
86
- if not isinstance(value, list):
87
- value = value.split(",")
88
- self._license.extend(value)
89
- self._license = [item.strip() for item in self._license]
90
- self._license = list(set(self._license))
91
-
92
- @property
93
- def source_name_or_path(self):
94
- return self._source_name_or_path
79
+ def comment(self):
80
+ return self._comment
95
81
 
96
- @source_name_or_path.setter
97
- def source_name_or_path(self, value):
82
+ @comment.setter
83
+ def comment(self, value):
98
84
  if not value:
99
- self._source_name_or_path = []
85
+ self._comment = ""
100
86
  else:
101
- if not isinstance(value, list):
102
- value = value.split(",")
103
- self._source_name_or_path.extend(value)
104
- self._source_name_or_path = [item.strip() for item in self._source_name_or_path]
105
- self._source_name_or_path = list(set(self._source_name_or_path))
87
+ if self._comment:
88
+ self._comment = f"{self._comment} / {value}"
89
+ else:
90
+ self._comment = value
106
91
 
107
- @property
108
- def yocto_recipe(self):
109
- return self._yocto_recipe
110
92
 
111
- @yocto_recipe.setter
112
- def yocto_recipe(self, value):
113
- if not isinstance(value, list):
114
- value = value.split(",")
115
- self._yocto_recipe.extend(value)
116
- self._yocto_recipe = [item.strip() for item in self._yocto_recipe]
117
- self._yocto_recipe = list(set(self._yocto_recipe))
93
+ class FileItem:
94
+ def __init__(self, value):
95
+ self.relative_path = value
96
+ self.source_name_or_path = ""
97
+ self._exclude = False
98
+ self._comment = ""
99
+ self.is_binary = False
100
+ self.oss_items: List[OssItem] = []
101
+
102
+ def __del__(self):
103
+ pass
118
104
 
119
105
  @property
120
- def yocto_package(self):
121
- return self._yocto_package
106
+ def exclude(self):
107
+ return self._exclude
122
108
 
123
- @yocto_package.setter
124
- def yocto_package(self, value):
125
- if not isinstance(value, list):
126
- value = value.split(",")
127
- self._yocto_package.extend(value)
128
- self._yocto_package = [item.strip() for item in self._yocto_package]
129
- self._yocto_package = list(set(self._yocto_package))
109
+ @exclude.setter
110
+ def exclude(self, value):
111
+ if value:
112
+ self._exclude = True
113
+ else:
114
+ self._exclude = False
115
+ for oss in self.oss_items:
116
+ oss.exclude = value
130
117
 
131
118
  @property
132
- def depends_on(self):
133
- return self._depends_on
119
+ def comment(self):
120
+ return self._comment
134
121
 
135
- @depends_on.setter
136
- def depends_on(self, value):
122
+ @comment.setter
123
+ def comment(self, value):
137
124
  if not value:
138
- self._depends_on = []
139
- else:
140
- if not isinstance(value, list):
141
- value = value.split(",")
142
- self._depends_on.extend(value)
143
- self._depends_on = [item.strip() for item in self._depends_on]
144
- self._depends_on = list(set(self._depends_on))
145
-
146
- def set_sheet_item(self, item, scanner_name=''):
147
- if len(item) < 9:
148
- _logger.warning(f"sheet list is too short ({len(item)}): {item}")
149
- return
150
- if scanner_name == FL_DEPENDENCY:
151
- self.purl = item[0]
125
+ self._comment = ""
152
126
  else:
153
- self.source_name_or_path = item[0]
154
- self.name = item[1]
155
- self.version = item[2]
156
- self.license = item[3]
157
- self.download_location = item[4]
158
- self.homepage = item[5]
159
- self.copyright = item[6]
160
- self.exclude = item[7]
161
- self.comment = item[8]
162
-
163
- if len(item) >= 10 and scanner_name == FL_DEPENDENCY:
164
- self.depends_on = item[9]
165
- if len(item) >= 10 and scanner_name == FL_BINARY:
166
- self.bin_vulnerability = item[9]
167
- if len(item) >= 12:
168
- self.bin_tlsh = item[10]
169
- self.bin_sha1 = item[11]
170
-
171
- def get_print_array(self, scanner_name=''):
127
+ if self._comment:
128
+ self._comment = f"{self._comment} / {value}"
129
+ else:
130
+ self._comment = value
131
+ for oss in self.oss_items:
132
+ oss.comment = value
133
+
134
+ def get_print_array(self):
172
135
  items = []
173
- if scanner_name != FL_DEPENDENCY:
174
- if len(self.source_name_or_path) == 0:
175
- self.source_name_or_path.append("")
176
- if len(self.license) == 0:
177
- self.license.append("")
178
-
179
- exclude = "Exclude" if self.exclude else ""
180
- lic = ",".join(self.license)
181
- if scanner_name == FL_DEPENDENCY:
182
- items = [self.purl, self.name, self.version, lic,
183
- self.download_location, self.homepage, self.copyright, exclude, self.comment]
184
- if len(self.depends_on) > 0:
185
- items.append(",".join(self.depends_on))
186
- else:
187
- for source_name_or_path in self.source_name_or_path:
188
- if scanner_name == FL_BINARY:
189
- oss_item = [os.path.join(self.relative_path, source_name_or_path), self.name, self.version, lic,
190
- self.download_location, self.homepage, self.copyright, exclude, self.comment,
191
- self.bin_vulnerability, self.bin_tlsh, self.bin_sha1]
192
- else:
193
- oss_item = [os.path.join(self.relative_path, source_name_or_path), self.name, self.version, lic,
194
- self.download_location, self.homepage, self.copyright, exclude, self.comment]
195
- items.append(oss_item)
136
+
137
+ for oss in self.oss_items:
138
+ exclude = "Exclude" if self.exclude or oss.exclude else ""
139
+ lic = ",".join(oss.license)
140
+
141
+ oss_item = [os.path.join(self.relative_path, self.source_name_or_path), oss.name, oss.version, lic,
142
+ oss.download_location, oss.homepage, oss.copyright, exclude, oss.comment]
143
+ items.append(oss_item)
196
144
  return items
197
145
 
198
146
  def get_print_json(self):
199
- json_item = {}
200
- json_item["name"] = self.name
201
-
202
- json_item["version"] = self.version
203
- if len(self.source_name_or_path) > 0:
204
- json_item["source path"] = self.source_name_or_path
205
- if len(self.license) > 0:
206
- json_item["license"] = self.license
207
- if self.download_location != "":
208
- json_item["download location"] = self.download_location
209
- if self.homepage != "":
210
- json_item["homepage"] = self.homepage
211
- if self.copyright != "":
212
- json_item["copyright text"] = self.copyright
213
- if self.exclude:
214
- json_item["exclude"] = self.exclude
215
- if self.comment != "":
216
- json_item["comment"] = self.comment
217
- if len(self.depends_on) > 0:
218
- json_item["depends on"] = self.depends_on
219
- if self.purl != "":
220
- json_item["package url"] = self.purl
221
-
222
- return json_item
147
+ items = []
148
+
149
+ for oss in self.oss_items:
150
+ json_item = {}
151
+ json_item["name"] = oss.name
152
+ json_item["version"] = oss.version
153
+
154
+ if self.source_name_or_path != "":
155
+ json_item["source path"] = self.source_name_or_path
156
+ if len(oss.license) > 0:
157
+ json_item["license"] = oss.license
158
+ if oss.download_location != "":
159
+ json_item["download location"] = oss.download_location
160
+ if oss.homepage != "":
161
+ json_item["homepage"] = oss.homepage
162
+ if oss.copyright != "":
163
+ json_item["copyright text"] = oss.copyright
164
+ if self.exclude or oss.exclude:
165
+ json_item["exclude"] = True
166
+ if oss.comment != "":
167
+ json_item["comment"] = oss.comment
168
+ items.append(json_item)
169
+ return items
223
170
 
224
171
 
225
172
  def invalid(cmd):
226
173
  _logger.info('[{}] is invalid'.format(cmd))
174
+
175
+
176
+ class ScannerItem:
177
+ def __init__(self, pkg_name, start_time=""):
178
+ self.cover = CoverItem(tool_name=pkg_name, start_time=start_time)
179
+ self.file_items: Dict[str, List[FileItem]] = {pkg_name: []} if pkg_name != FOSSLIGHT_SCANNER else {}
180
+ self.external_sheets: Dict[str, List[List[str]]] = {}
181
+
182
+ def set_cover_pathinfo(self, input_dir, path_to_exclude):
183
+ self.cover.input_path = input_dir
184
+ self.cover.exclude_path = ", ".join(path_to_exclude)
185
+
186
+ def set_cover_comment(self, value):
187
+ if value:
188
+ if self.cover.comment:
189
+ self.cover.comment = f"{self.cover.comment} / {value}"
190
+ else:
191
+ self.cover.comment = value
192
+
193
+ def get_cover_comment(self):
194
+ return [item.strip() for item in self.cover.comment.split(" / ")]
195
+
196
+ def append_file_items(self, file_item: List[FileItem], pkg_name=""):
197
+ if pkg_name == "":
198
+ if len(self.file_items.keys()) != 1:
199
+ _logger.error("Package name is not set. Cannot append file_item into ScannerItem.")
200
+ else:
201
+ pkg_name = list(self.file_items.keys())[0]
202
+ if pkg_name not in self.file_items:
203
+ self.file_items[pkg_name] = []
204
+ self.file_items[pkg_name].extend(file_item)
205
+
206
+ def get_print_array(self, scanner_name):
207
+ items = []
208
+ for file_item in self.file_items[scanner_name]:
209
+ items.extend(file_item.get_print_array())
210
+ return items
211
+
212
+ def get_print_json(self, scanner_name):
213
+ items = []
214
+ for file_item in self.file_items[scanner_name]:
215
+ items.extend(file_item.get_print_json())
216
+ return items
217
+
218
+ def __del__(self):
219
+ pass
@@ -6,6 +6,7 @@ import os
6
6
  from fosslight_util.write_excel import write_result_to_excel, write_result_to_csv
7
7
  from fosslight_util.write_opossum import write_opossum
8
8
  from fosslight_util.write_yaml import write_yaml
9
+ from typing import Tuple
9
10
 
10
11
  SUPPORT_FORMAT = {'excel': '.xlsx', 'csv': '.csv', 'opossum': '.json', 'yaml': '.yaml'}
11
12
 
@@ -105,7 +106,8 @@ def check_output_formats(output='', formats=[], customized_format={}):
105
106
  return success, msg, output_path, output_files, output_extensions
106
107
 
107
108
 
108
- def write_output_file(output_file_without_ext, file_extension, sheet_list, extended_header={}, hide_header={}, cover=""):
109
+ def write_output_file(output_file_without_ext: str, file_extension: str, scan_item, extended_header: dict = {},
110
+ hide_header: dict = {}) -> Tuple[bool, str, str]:
109
111
  success = True
110
112
  msg = ''
111
113
 
@@ -114,13 +116,13 @@ def write_output_file(output_file_without_ext, file_extension, sheet_list, exten
114
116
  result_file = output_file_without_ext + file_extension
115
117
 
116
118
  if file_extension == '.xlsx':
117
- success, msg = write_result_to_excel(result_file, sheet_list, extended_header, hide_header, cover)
119
+ success, msg = write_result_to_excel(result_file, scan_item, extended_header, hide_header)
118
120
  elif file_extension == '.csv':
119
- success, msg, result_file = write_result_to_csv(result_file, sheet_list)
121
+ success, msg, result_file = write_result_to_csv(result_file, scan_item, False, extended_header)
120
122
  elif file_extension == '.json':
121
- success, msg = write_opossum(result_file, sheet_list)
123
+ success, msg = write_opossum(result_file, scan_item)
122
124
  elif file_extension == '.yaml':
123
- success, msg, result_file = write_yaml(result_file, sheet_list, False)
125
+ success, msg, result_file = write_yaml(result_file, scan_item, False)
124
126
  else:
125
127
  success = False
126
128
  msg = f'Not supported file extension({file_extension})'
@@ -8,8 +8,8 @@ import codecs
8
8
  import os
9
9
  import re
10
10
  import sys
11
- from .constant import LOGGER_NAME
12
- from .oss_item import OssItem
11
+ from fosslight_util.constant import LOGGER_NAME
12
+ from fosslight_util.oss_item import OssItem, FileItem
13
13
 
14
14
  _logger = logging.getLogger(LOGGER_NAME)
15
15
  SUPPORT_OSS_INFO_FILES = [r"oss-pkg-info[\s\S]*.ya?ml", r"sbom(-|_)info[\s\S]*.ya?ml"]
@@ -17,7 +17,7 @@ EXAMPLE_OSS_PKG_INFO_LINK = "https://github.com/fosslight/fosslight_prechecker/b
17
17
 
18
18
 
19
19
  def parsing_yml(yaml_file, base_path, print_log=True):
20
- oss_list = []
20
+ fileitems = []
21
21
  license_list = []
22
22
  idx = 1
23
23
  err_reason = ""
@@ -38,37 +38,65 @@ def parsing_yml(yaml_file, base_path, print_log=True):
38
38
  err_reason = "empty"
39
39
  if print_log:
40
40
  _logger.warning(f"The yaml file is empty file: {yaml_file}")
41
- return oss_list, license_list, err_reason
41
+ return fileitems, license_list, err_reason
42
42
 
43
43
  is_old_format = any(x in doc for x in OLD_YAML_ROOT_ELEMENT)
44
44
 
45
+ filepath_list = []
45
46
  for root_element in doc:
46
47
  oss_items = doc[root_element]
47
48
  if oss_items:
48
49
  if not isinstance(oss_items, list) or 'version' not in oss_items[0]:
49
50
  raise AttributeError(f"- Ref. {EXAMPLE_OSS_PKG_INFO_LINK}")
50
51
  for oss in oss_items:
51
- item = OssItem(relative_path)
52
- if not is_old_format:
53
- item.name = root_element
54
- for key, value in oss.items():
55
- if key:
56
- key = key.lower().strip()
57
- set_value_switch(item, key, value, yaml_file)
58
- oss_list.append(item)
59
- license_list.extend(item.license)
60
- idx += 1
52
+ source_paths = get_source_name_or_path_in_yaml(oss)
53
+ for source_path in source_paths:
54
+ if os.path.join(relative_path, source_path) not in filepath_list:
55
+ filepath_list.append(os.path.join(relative_path, source_path))
56
+ fileitem = FileItem(relative_path)
57
+ fileitem.source_name_or_path = source_path
58
+ fileitems.append(fileitem)
59
+ else:
60
+ fileitem = next((i for i in fileitems if i.source_name_or_path == source_path), None)
61
+ ossitem = OssItem()
62
+ if not is_old_format:
63
+ ossitem.name = root_element
64
+ for key, value in oss.items():
65
+ if key:
66
+ key = key.lower().strip()
67
+ set_value_switch(ossitem, key, value, yaml_file)
68
+ fileitem.oss_items.append(ossitem)
69
+ license_list.extend(ossitem.license)
70
+ idx += 1
61
71
  except AttributeError as ex:
62
72
  if print_log:
63
73
  _logger.warning(f"Not supported yaml file format: {yaml_file} {ex}")
64
- oss_list = []
74
+ fileitems = []
65
75
  err_reason = "not_supported"
66
76
  except yaml.YAMLError:
67
77
  if print_log:
68
78
  _logger.warning(f"Error to parse yaml - skip to parse yaml file: {yaml_file}")
69
- oss_list = []
79
+ fileitems = []
70
80
  err_reason = "yaml_error"
71
- return oss_list, set(license_list), err_reason
81
+
82
+ return fileitems, set(license_list), err_reason
83
+
84
+
85
+ def get_source_name_or_path_in_yaml(oss):
86
+ source_name_or_path = []
87
+ find = False
88
+ for key in oss.keys():
89
+ if key in ['file name or path', 'source name or path', 'source path',
90
+ 'file', 'binary name', 'binary path']:
91
+ if isinstance(oss[key], list):
92
+ source_name_or_path = oss[key]
93
+ else:
94
+ source_name_or_path.append(oss[key])
95
+ find = True
96
+ break
97
+ if not find:
98
+ source_name_or_path.append('')
99
+ return source_name_or_path
72
100
 
73
101
 
74
102
  def find_sbom_yaml_files(path_to_find):
@@ -101,9 +129,6 @@ def set_value_switch(oss, key, value, yaml_file=""):
101
129
  oss.download_location = value
102
130
  elif key in ['license', 'license text']:
103
131
  oss.license = value
104
- elif key in ['file name or path', 'source name or path', 'source path',
105
- 'file', 'binary name', 'binary path']:
106
- oss.source_name_or_path = value
107
132
  elif key in ['copyright text', 'copyright']:
108
133
  oss.copyright = value
109
134
  elif key == 'exclude':
@@ -112,16 +137,6 @@ def set_value_switch(oss, key, value, yaml_file=""):
112
137
  oss.comment = value
113
138
  elif key == 'homepage':
114
139
  oss.homepage = value
115
- elif key == 'yocto_package':
116
- oss.yocto_package = value
117
- elif key == 'yocto_recipe':
118
- oss.yocto_recipe = value
119
- elif key == 'vulnerability link':
120
- oss.bin_vulnerability = value
121
- elif key == 'tlsh':
122
- oss.bin_tlsh = value
123
- elif key == 'sha1':
124
- oss.bin_sha1 = value
125
140
  else:
126
141
  if yaml_file != "":
127
142
  _logger.debug(f"file:{yaml_file} - key:{key} cannot be parsed")
@@ -4,27 +4,24 @@
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
  import logging
6
6
  from typing import List, Dict, Any
7
- import xlrd
7
+ import pandas as pd
8
8
  import json
9
9
  from fosslight_util.constant import LOGGER_NAME
10
- from fosslight_util.oss_item import OssItem
10
+ from fosslight_util.oss_item import OssItem, FileItem
11
11
  from fosslight_util.parsing_yaml import set_value_switch
12
12
 
13
13
  logger = logging.getLogger(LOGGER_NAME)
14
14
  IDX_CANNOT_FOUND = -1
15
15
  PREFIX_BIN = "bin"
16
16
  SHEET_PREFIX_TO_READ = ["bin", "bom", "src"]
17
- xlrd.xlsx.ensure_elementtree_imported(False, None)
18
- xlrd.xlsx.Element_has_iter = True
19
17
 
20
18
 
21
- def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
22
- oss_report_items: List[OssItem] = []
19
+ def read_oss_report(excel_file: str, sheet_names: str = "", basepath: str = "") -> List[FileItem]:
20
+ fileitems: List[FileItem] = []
23
21
  xl_sheets: Dict[str, Any] = {}
24
22
  all_sheet_to_read: List[str] = []
25
23
  not_matched_sheet: List[str] = []
26
24
  any_sheet_matched = False
27
-
28
25
  if sheet_names:
29
26
  sheet_name_prefix_match = False
30
27
  sheet_name_to_read = sheet_names.split(",")
@@ -34,9 +31,8 @@ def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
34
31
 
35
32
  try:
36
33
  logger.info(f"Read data from : {excel_file}")
37
- xl_workbook = xlrd.open_workbook(excel_file)
38
- all_sheet_in_excel = xl_workbook.sheet_names()
39
-
34
+ xl_workbook = pd.ExcelFile(excel_file, engine='openpyxl')
35
+ all_sheet_in_excel = xl_workbook.sheet_names
40
36
  for sheet_to_read in sheet_name_to_read:
41
37
  try:
42
38
  any_sheet_matched = False
@@ -46,10 +42,9 @@ def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
46
42
  sheet_name_lower = sheet_name.lower()
47
43
  if (sheet_name_prefix_match and sheet_name_lower.startswith(sheet_to_read_lower)) \
48
44
  or sheet_to_read_lower == sheet_name_lower:
49
- sheet = xl_workbook.sheet_by_name(sheet_name)
50
- if sheet:
51
- xl_sheets[sheet_name] = sheet
52
- any_sheet_matched = True
45
+ sheet = pd.read_excel(excel_file, sheet_name=sheet_name, engine='openpyxl', na_values='')
46
+ xl_sheets[sheet_name] = sheet.fillna('')
47
+ any_sheet_matched = True
53
48
  if not any_sheet_matched:
54
49
  not_matched_sheet.append(sheet_to_read)
55
50
  except Exception as error:
@@ -62,6 +57,7 @@ def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
62
57
  elif (not sheet_name_prefix_match) and not_matched_sheet:
63
58
  logger.warning(f"Not matched sheet name: {not_matched_sheet}")
64
59
 
60
+ filepath_list = []
65
61
  for sheet_name, xl_sheet in xl_sheets.items():
66
62
  _item_idx = {
67
63
  "ID": IDX_CANNOT_FOUND,
@@ -82,46 +78,44 @@ def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
82
78
  "TLSH": IDX_CANNOT_FOUND,
83
79
  "SHA1": IDX_CANNOT_FOUND
84
80
  }
85
- num_cols = xl_sheet.ncols
86
- num_rows = xl_sheet.nrows
87
- MAX_FIND_HEADER_COLUMN = 5 if num_rows > 5 else num_rows
88
- DATA_START_ROW_IDX = 1
89
- for row_idx in range(0, MAX_FIND_HEADER_COLUMN):
90
- for col_idx in range(row_idx, num_cols):
91
- cell_obj = xl_sheet.cell(row_idx, col_idx)
92
- if cell_obj.value in _item_idx:
93
- _item_idx[cell_obj.value] = col_idx
94
81
 
95
- if len([key for key, value in _item_idx.items() if value != IDX_CANNOT_FOUND]) > 3:
96
- DATA_START_ROW_IDX = row_idx + 1
97
- break
82
+ for index, value in enumerate(xl_sheet.columns.tolist()):
83
+ _item_idx[value] = index
98
84
 
99
85
  # Get all values, iterating through rows and columns
100
86
  column_keys = json.loads(json.dumps(_item_idx))
101
87
 
102
88
  is_bin = True if sheet_name.lower().startswith(PREFIX_BIN) else False
103
89
 
104
- for row_idx in range(DATA_START_ROW_IDX, xl_sheet.nrows):
105
- item = OssItem("")
106
- item.is_binary = is_bin
90
+ for row_idx, row in xl_sheet.iterrows():
107
91
  valid_row = True
108
92
  load_data_cnt = 0
109
-
93
+ source_path = row[1]
94
+ if source_path not in filepath_list:
95
+ filepath_list.append(source_path)
96
+ fileitem = FileItem(basepath)
97
+ fileitem.source_name_or_path = source_path
98
+ fileitems.append(fileitem)
99
+ else:
100
+ fileitem = next((i for i in fileitems if i.source_name_or_path == source_path), None)
101
+ fileitem.is_binary = is_bin
102
+ ossitem = OssItem()
110
103
  for column_key, column_idx in column_keys.items():
111
104
  if column_idx != IDX_CANNOT_FOUND:
112
- cell_obj = xl_sheet.cell(row_idx, column_idx)
113
- cell_value = cell_obj.value
105
+ cell_obj = xl_sheet.iloc[row_idx, column_idx]
106
+ cell_value = cell_obj
107
+
114
108
  if cell_value != "":
115
109
  if column_key != "ID":
116
110
  if column_key:
117
111
  column_key = column_key.lower().strip()
118
- set_value_switch(item, column_key, cell_value)
112
+ set_value_switch(ossitem, column_key, cell_value)
119
113
  load_data_cnt += 1
120
114
  else:
121
115
  valid_row = False if cell_value == "-" else True
122
116
  if valid_row and load_data_cnt > 0:
123
- oss_report_items.append(item)
117
+ fileitem.oss_items.append(ossitem)
124
118
 
125
119
  except Exception as error:
126
120
  logger.error(f"Parsing a OSS Report: {error}")
127
- return oss_report_items
121
+ return fileitems